diff options
Diffstat (limited to 'REORG.TODO/sysdeps/powerpc')
983 files changed, 77241 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/powerpc/Implies b/REORG.TODO/sysdeps/powerpc/Implies new file mode 100644 index 0000000000..78dba9510c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/Implies @@ -0,0 +1,5 @@ +# On PowerPC we use the IBM extended long double format. +ieee754/ldbl-128ibm +ieee754/ldbl-opt +ieee754/dbl-64 +ieee754/flt-32 diff --git a/REORG.TODO/sysdeps/powerpc/Makefile b/REORG.TODO/sysdeps/powerpc/Makefile new file mode 100644 index 0000000000..e03a202c65 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/Makefile @@ -0,0 +1,42 @@ +ifeq ($(subdir),string) +CFLAGS-memcmp.c += -Wno-uninitialized +endif + +ifeq ($(subdir),elf) +# extra shared linker files to link into dl-allobjs.so and libc +sysdep-dl-routines += dl-machine hwcapinfo +sysdep_routines += dl-machine hwcapinfo +# extra shared linker files to link only into dl-allobjs.so +sysdep-rtld-routines += dl-machine hwcapinfo +# Don't optimize GD tls sequence to LE. +LDFLAGS-tst-tlsopt-powerpc += -Wl,--no-tls-optimize +tests += tst-tlsopt-powerpc +endif + +ifeq ($(subdir),setjmp) +ifeq (yes,$(build-shared)) +sysdep_routines += novmx-longjmp novmx-sigjmp +endif +endif + +ifeq ($(subdir),csu) +# get offset to rtld_global._dl_hwcap and rtld_global._dl_hwcap2 +gen-as-const-headers += rtld-global-offsets.sym +# get offset to __locale_struct.__ctype_tolower +gen-as-const-headers += locale-defines.sym +endif + +ifeq ($(subdir),nptl) +tests-internal += test-get_hwcap test-get_hwcap-static +tests-static += test-get_hwcap-static +endif + +ifeq ($(subdir),misc) +sysdep_headers += sys/platform/ppc.h +tests += test-gettimebase +tests += tst-set_ppr +endif + +ifneq (,$(filter %le,$(config-machine))) +abilist-pattern = %-le.abilist +endif diff --git a/REORG.TODO/sysdeps/powerpc/Versions b/REORG.TODO/sysdeps/powerpc/Versions new file mode 100644 index 0000000000..95849668f2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/Versions @@ -0,0 +1,31 @@ +libm { + GLIBC_2.1 { + # symbols used in macros from sysdeps/powerpc/bits/fenv.h + __fe_dfl_env; __fe_enabled_env; __fe_nonieee_env; __fe_nomask_env; + } + GLIBC_2.25 { + __fe_dfl_mode; + } +} + +libc { + GLIBC_2.3.4 { + _longjmp; __sigsetjmp; _setjmp; + longjmp; setjmp; + } + GLIBC_PRIVATE { + __novmx__libc_longjmp; __novmx__libc_siglongjmp; + __vmx__libc_longjmp; __vmx__libc_siglongjmp; + } +} + +ld { + GLIBC_2.22 { + __tls_get_addr_opt; + } + GLIBC_2.23 { + # Symbol used to version control when the ABI started to specify that HWCAP + # and AT_PLATFORM data should be stored into the TCB. + __parse_hwcap_and_convert_at_platform; + } +} diff --git a/REORG.TODO/sysdeps/powerpc/abort-instr.h b/REORG.TODO/sysdeps/powerpc/abort-instr.h new file mode 100644 index 0000000000..43746e65ba --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/abort-instr.h @@ -0,0 +1,2 @@ +/* An op-code of 0 is guaranteed to be illegal. */ +#define ABORT_INSTRUCTION asm (".long 0") diff --git a/REORG.TODO/sysdeps/powerpc/atomic-machine.h b/REORG.TODO/sysdeps/powerpc/atomic-machine.h new file mode 100644 index 0000000000..0a58203a10 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/atomic-machine.h @@ -0,0 +1,339 @@ +/* Atomic operations. PowerPC Common version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Paul Mackerras <paulus@au.ibm.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + * Never include sysdeps/powerpc/atomic-machine.h directly. + * Alway use include/atomic.h which will include either + * sysdeps/powerpc/powerpc32/atomic-machine.h + * or + * sysdeps/powerpc/powerpc64/atomic-machine.h + * as appropriate and which in turn include this file. + */ + +#include <stdint.h> + +typedef int32_t atomic32_t; +typedef uint32_t uatomic32_t; +typedef int_fast32_t atomic_fast32_t; +typedef uint_fast32_t uatomic_fast32_t; + +typedef int64_t atomic64_t; +typedef uint64_t uatomic64_t; +typedef int_fast64_t atomic_fast64_t; +typedef uint_fast64_t uatomic_fast64_t; + +typedef intptr_t atomicptr_t; +typedef uintptr_t uatomicptr_t; +typedef intmax_t atomic_max_t; +typedef uintmax_t uatomic_max_t; + +/* + * Powerpc does not have byte and halfword forms of load and reserve and + * store conditional. So for powerpc we stub out the 8- and 16-bit forms. + */ +#define __arch_compare_and_exchange_bool_8_acq(mem, newval, oldval) \ + (abort (), 0) + +#define __arch_compare_and_exchange_bool_16_acq(mem, newval, oldval) \ + (abort (), 0) + +#ifdef UP +# define __ARCH_ACQ_INSTR "" +# define __ARCH_REL_INSTR "" +#else +# define __ARCH_ACQ_INSTR "isync" +# ifndef __ARCH_REL_INSTR +# define __ARCH_REL_INSTR "sync" +# endif +#endif + +#ifndef MUTEX_HINT_ACQ +# define MUTEX_HINT_ACQ +#endif +#ifndef MUTEX_HINT_REL +# define MUTEX_HINT_REL +#endif + +#define atomic_full_barrier() __asm ("sync" ::: "memory") + +#define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \ + ({ \ + __typeof (*(mem)) __tmp; \ + __typeof (mem) __memp = (mem); \ + __asm __volatile ( \ + "1: lwarx %0,0,%1" MUTEX_HINT_ACQ "\n" \ + " cmpw %0,%2\n" \ + " bne 2f\n" \ + " stwcx. %3,0,%1\n" \ + " bne- 1b\n" \ + "2: " __ARCH_ACQ_INSTR \ + : "=&r" (__tmp) \ + : "b" (__memp), "r" (oldval), "r" (newval) \ + : "cr0", "memory"); \ + __tmp; \ + }) + +#define __arch_compare_and_exchange_val_32_rel(mem, newval, oldval) \ + ({ \ + __typeof (*(mem)) __tmp; \ + __typeof (mem) __memp = (mem); \ + __asm __volatile (__ARCH_REL_INSTR "\n" \ + "1: lwarx %0,0,%1" MUTEX_HINT_REL "\n" \ + " cmpw %0,%2\n" \ + " bne 2f\n" \ + " stwcx. %3,0,%1\n" \ + " bne- 1b\n" \ + "2: " \ + : "=&r" (__tmp) \ + : "b" (__memp), "r" (oldval), "r" (newval) \ + : "cr0", "memory"); \ + __tmp; \ + }) + +#define __arch_atomic_exchange_32_acq(mem, value) \ + ({ \ + __typeof (*mem) __val; \ + __asm __volatile ( \ + "1: lwarx %0,0,%2" MUTEX_HINT_ACQ "\n" \ + " stwcx. %3,0,%2\n" \ + " bne- 1b\n" \ + " " __ARCH_ACQ_INSTR \ + : "=&r" (__val), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_32_rel(mem, value) \ + ({ \ + __typeof (*mem) __val; \ + __asm __volatile (__ARCH_REL_INSTR "\n" \ + "1: lwarx %0,0,%2" MUTEX_HINT_REL "\n" \ + " stwcx. %3,0,%2\n" \ + " bne- 1b" \ + : "=&r" (__val), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_and_add_32(mem, value) \ + ({ \ + __typeof (*mem) __val, __tmp; \ + __asm __volatile ("1: lwarx %0,0,%3\n" \ + " add %1,%0,%4\n" \ + " stwcx. %1,0,%3\n" \ + " bne- 1b" \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_and_add_32_acq(mem, value) \ + ({ \ + __typeof (*mem) __val, __tmp; \ + __asm __volatile ("1: lwarx %0,0,%3" MUTEX_HINT_ACQ "\n" \ + " add %1,%0,%4\n" \ + " stwcx. %1,0,%3\n" \ + " bne- 1b\n" \ + __ARCH_ACQ_INSTR \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_and_add_32_rel(mem, value) \ + ({ \ + __typeof (*mem) __val, __tmp; \ + __asm __volatile (__ARCH_REL_INSTR "\n" \ + "1: lwarx %0,0,%3" MUTEX_HINT_REL "\n" \ + " add %1,%0,%4\n" \ + " stwcx. %1,0,%3\n" \ + " bne- 1b" \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_increment_val_32(mem) \ + ({ \ + __typeof (*(mem)) __val; \ + __asm __volatile ("1: lwarx %0,0,%2\n" \ + " addi %0,%0,1\n" \ + " stwcx. %0,0,%2\n" \ + " bne- 1b" \ + : "=&b" (__val), "=m" (*mem) \ + : "b" (mem), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_decrement_val_32(mem) \ + ({ \ + __typeof (*(mem)) __val; \ + __asm __volatile ("1: lwarx %0,0,%2\n" \ + " subi %0,%0,1\n" \ + " stwcx. %0,0,%2\n" \ + " bne- 1b" \ + : "=&b" (__val), "=m" (*mem) \ + : "b" (mem), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_decrement_if_positive_32(mem) \ + ({ int __val, __tmp; \ + __asm __volatile ("1: lwarx %0,0,%3\n" \ + " cmpwi 0,%0,0\n" \ + " addi %1,%0,-1\n" \ + " ble 2f\n" \ + " stwcx. %1,0,%3\n" \ + " bne- 1b\n" \ + "2: " __ARCH_ACQ_INSTR \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \ + ({ \ + __typeof (*(mem)) __result; \ + if (sizeof (*mem) == 4) \ + __result = __arch_compare_and_exchange_val_32_acq(mem, newval, oldval); \ + else if (sizeof (*mem) == 8) \ + __result = __arch_compare_and_exchange_val_64_acq(mem, newval, oldval); \ + else \ + abort (); \ + __result; \ + }) + +#define atomic_compare_and_exchange_val_rel(mem, newval, oldval) \ + ({ \ + __typeof (*(mem)) __result; \ + if (sizeof (*mem) == 4) \ + __result = __arch_compare_and_exchange_val_32_rel(mem, newval, oldval); \ + else if (sizeof (*mem) == 8) \ + __result = __arch_compare_and_exchange_val_64_rel(mem, newval, oldval); \ + else \ + abort (); \ + __result; \ + }) + +#define atomic_exchange_acq(mem, value) \ + ({ \ + __typeof (*(mem)) __result; \ + if (sizeof (*mem) == 4) \ + __result = __arch_atomic_exchange_32_acq (mem, value); \ + else if (sizeof (*mem) == 8) \ + __result = __arch_atomic_exchange_64_acq (mem, value); \ + else \ + abort (); \ + __result; \ + }) + +#define atomic_exchange_rel(mem, value) \ + ({ \ + __typeof (*(mem)) __result; \ + if (sizeof (*mem) == 4) \ + __result = __arch_atomic_exchange_32_rel (mem, value); \ + else if (sizeof (*mem) == 8) \ + __result = __arch_atomic_exchange_64_rel (mem, value); \ + else \ + abort (); \ + __result; \ + }) + +#define atomic_exchange_and_add(mem, value) \ + ({ \ + __typeof (*(mem)) __result; \ + if (sizeof (*mem) == 4) \ + __result = __arch_atomic_exchange_and_add_32 (mem, value); \ + else if (sizeof (*mem) == 8) \ + __result = __arch_atomic_exchange_and_add_64 (mem, value); \ + else \ + abort (); \ + __result; \ + }) +#define atomic_exchange_and_add_acq(mem, value) \ + ({ \ + __typeof (*(mem)) __result; \ + if (sizeof (*mem) == 4) \ + __result = __arch_atomic_exchange_and_add_32_acq (mem, value); \ + else if (sizeof (*mem) == 8) \ + __result = __arch_atomic_exchange_and_add_64_acq (mem, value); \ + else \ + abort (); \ + __result; \ + }) +#define atomic_exchange_and_add_rel(mem, value) \ + ({ \ + __typeof (*(mem)) __result; \ + if (sizeof (*mem) == 4) \ + __result = __arch_atomic_exchange_and_add_32_rel (mem, value); \ + else if (sizeof (*mem) == 8) \ + __result = __arch_atomic_exchange_and_add_64_rel (mem, value); \ + else \ + abort (); \ + __result; \ + }) + +#define atomic_increment_val(mem) \ + ({ \ + __typeof (*(mem)) __result; \ + if (sizeof (*(mem)) == 4) \ + __result = __arch_atomic_increment_val_32 (mem); \ + else if (sizeof (*(mem)) == 8) \ + __result = __arch_atomic_increment_val_64 (mem); \ + else \ + abort (); \ + __result; \ + }) + +#define atomic_increment(mem) ({ atomic_increment_val (mem); (void) 0; }) + +#define atomic_decrement_val(mem) \ + ({ \ + __typeof (*(mem)) __result; \ + if (sizeof (*(mem)) == 4) \ + __result = __arch_atomic_decrement_val_32 (mem); \ + else if (sizeof (*(mem)) == 8) \ + __result = __arch_atomic_decrement_val_64 (mem); \ + else \ + abort (); \ + __result; \ + }) + +#define atomic_decrement(mem) ({ atomic_decrement_val (mem); (void) 0; }) + + +/* Decrement *MEM if it is > 0, and return the old value. */ +#define atomic_decrement_if_positive(mem) \ + ({ __typeof (*(mem)) __result; \ + if (sizeof (*mem) == 4) \ + __result = __arch_atomic_decrement_if_positive_32 (mem); \ + else if (sizeof (*mem) == 8) \ + __result = __arch_atomic_decrement_if_positive_64 (mem); \ + else \ + abort (); \ + __result; \ + }) diff --git a/REORG.TODO/sysdeps/powerpc/bits/endian.h b/REORG.TODO/sysdeps/powerpc/bits/endian.h new file mode 100644 index 0000000000..db150e9efc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/bits/endian.h @@ -0,0 +1,36 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* PowerPC can be little or big endian. Hopefully gcc will know... */ + +#ifndef _ENDIAN_H +# error "Never use <bits/endian.h> directly; include <endian.h> instead." +#endif + +#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN +# if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN +# error Both BIG_ENDIAN and LITTLE_ENDIAN defined! +# endif +# define __BYTE_ORDER __BIG_ENDIAN +#else +# if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN +# define __BYTE_ORDER __LITTLE_ENDIAN +# else +# warning Cannot determine current byte order, assuming big-endian. +# define __BYTE_ORDER __BIG_ENDIAN +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/bits/fenv.h b/REORG.TODO/sysdeps/powerpc/bits/fenv.h new file mode 100644 index 0000000000..c279b484f5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/bits/fenv.h @@ -0,0 +1,180 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _FENV_H +# error "Never use <bits/fenv.h> directly; include <fenv.h> instead." +#endif + + +/* Define bits representing the exception. We use the bit positions of + the appropriate bits in the FPSCR... */ +enum + { + FE_INEXACT = +#define FE_INEXACT (1 << (31 - 6)) + FE_INEXACT, + FE_DIVBYZERO = +#define FE_DIVBYZERO (1 << (31 - 5)) + FE_DIVBYZERO, + FE_UNDERFLOW = +#define FE_UNDERFLOW (1 << (31 - 4)) + FE_UNDERFLOW, + FE_OVERFLOW = +#define FE_OVERFLOW (1 << (31 - 3)) + FE_OVERFLOW, + + /* ... except for FE_INVALID, for which we use bit 31. FE_INVALID + actually corresponds to bits 7 through 12 and 21 through 23 + in the FPSCR, but we can't use that because the current draft + says that it must be a power of 2. Instead we use bit 2 which + is the summary bit for all the FE_INVALID exceptions, which + kind of makes sense. */ + FE_INVALID = +#define FE_INVALID (1 << (31 - 2)) + FE_INVALID, + +#ifdef __USE_GNU + /* Breakdown of the FE_INVALID bits. Setting FE_INVALID on an + input to a routine is equivalent to setting all of these bits; + FE_INVALID will be set on output from a routine iff one of + these bits is set. Note, though, that you can't disable or + enable these exceptions individually. */ + + /* Operation with a sNaN. */ + FE_INVALID_SNAN = +# define FE_INVALID_SNAN (1 << (31 - 7)) + FE_INVALID_SNAN, + + /* Inf - Inf */ + FE_INVALID_ISI = +# define FE_INVALID_ISI (1 << (31 - 8)) + FE_INVALID_ISI, + + /* Inf / Inf */ + FE_INVALID_IDI = +# define FE_INVALID_IDI (1 << (31 - 9)) + FE_INVALID_IDI, + + /* 0 / 0 */ + FE_INVALID_ZDZ = +# define FE_INVALID_ZDZ (1 << (31 - 10)) + FE_INVALID_ZDZ, + + /* Inf * 0 */ + FE_INVALID_IMZ = +# define FE_INVALID_IMZ (1 << (31 - 11)) + FE_INVALID_IMZ, + + /* Comparison with a NaN. */ + FE_INVALID_COMPARE = +# define FE_INVALID_COMPARE (1 << (31 - 12)) + FE_INVALID_COMPARE, + + /* Invalid operation flag for software (not set by hardware). */ + /* Note that some chips don't have this implemented, presumably + because no-one expected anyone to write software for them %-). */ + FE_INVALID_SOFTWARE = +# define FE_INVALID_SOFTWARE (1 << (31 - 21)) + FE_INVALID_SOFTWARE, + + /* Square root of negative number (including -Inf). */ + /* Note that some chips don't have this implemented. */ + FE_INVALID_SQRT = +# define FE_INVALID_SQRT (1 << (31 - 22)) + FE_INVALID_SQRT, + + /* Conversion-to-integer of a NaN or a number too large or too small. */ + FE_INVALID_INTEGER_CONVERSION = +# define FE_INVALID_INTEGER_CONVERSION (1 << (31 - 23)) + FE_INVALID_INTEGER_CONVERSION + +# define FE_ALL_INVALID \ + (FE_INVALID_SNAN | FE_INVALID_ISI | FE_INVALID_IDI | FE_INVALID_ZDZ \ + | FE_INVALID_IMZ | FE_INVALID_COMPARE | FE_INVALID_SOFTWARE \ + | FE_INVALID_SQRT | FE_INVALID_INTEGER_CONVERSION) +#endif + }; + +#define FE_ALL_EXCEPT \ + (FE_INEXACT | FE_DIVBYZERO | FE_UNDERFLOW | FE_OVERFLOW | FE_INVALID) + +/* PowerPC chips support all of the four defined rounding modes. We + use the bit pattern in the FPSCR as the values for the + appropriate macros. */ +enum + { + FE_TONEAREST = +#define FE_TONEAREST 0 + FE_TONEAREST, + FE_TOWARDZERO = +#define FE_TOWARDZERO 1 + FE_TOWARDZERO, + FE_UPWARD = +#define FE_UPWARD 2 + FE_UPWARD, + FE_DOWNWARD = +#define FE_DOWNWARD 3 + FE_DOWNWARD + }; + +/* Type representing exception flags. */ +typedef unsigned int fexcept_t; + +/* Type representing floating-point environment. We leave it as 'double' + for efficiency reasons (rather than writing it to a 32-bit integer). */ +typedef double fenv_t; + +/* If the default argument is used we use this value. */ +extern const fenv_t __fe_dfl_env; +#define FE_DFL_ENV (&__fe_dfl_env) + +#ifdef __USE_GNU +/* Floating-point environment where all exceptions are enabled. Note that + this is not sufficient to give you SIGFPE. */ +extern const fenv_t __fe_enabled_env; +# define FE_ENABLED_ENV (&__fe_enabled_env) + +/* Floating-point environment with (processor-dependent) non-IEEE floating + point. */ +extern const fenv_t __fe_nonieee_env; +# define FE_NONIEEE_ENV (&__fe_nonieee_env) + +/* Floating-point environment with all exceptions enabled. Note that + just evaluating this value does not change the processor exception mode. + Passing this mask to fesetenv will result in a prctl syscall to change + the MSR FE0/FE1 bits to "Precise Mode". On some processors this will + result in slower floating point execution. This will last until an + fenv or exception mask is installed that disables all FP exceptions. */ +# define FE_NOMASK_ENV FE_ENABLED_ENV + +/* Floating-point environment with all exceptions disabled. Note that + just evaluating this value does not change the processor exception mode. + Passing this mask to fesetenv will result in a prctl syscall to change + the MSR FE0/FE1 bits to "Ignore Exceptions Mode". On most processors + this allows the fastest possible floating point execution.*/ +# define FE_MASK_ENV FE_DFL_ENV + +#endif + +#if __GLIBC_USE (IEC_60559_BFP_EXT) +/* Type representing floating-point control modes. */ +typedef double femode_t; + +/* Default floating-point control modes. */ +extern const femode_t __fe_dfl_mode; +# define FE_DFL_MODE (&__fe_dfl_mode) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/bits/fenvinline.h b/REORG.TODO/sysdeps/powerpc/bits/fenvinline.h new file mode 100644 index 0000000000..4110bdfbbf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/bits/fenvinline.h @@ -0,0 +1,79 @@ +/* Inline floating-point environment handling functions for powerpc. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined __GNUC__ && !defined _SOFT_FLOAT && !defined __NO_FPRS__ + +/* Inline definition for fegetround. */ +# define __fegetround() \ + (__extension__ ({ int __fegetround_result; \ + __asm__ __volatile__ \ + ("mcrfs 7,7 ; mfcr %0" \ + : "=r"(__fegetround_result) : : "cr7"); \ + __fegetround_result & 3; })) +# define fegetround() __fegetround () + +# ifndef __NO_MATH_INLINES +/* The weird 'i#*X' constraints on the following suppress a gcc + warning when __excepts is not a constant. Otherwise, they mean the + same as just plain 'i'. */ + +# if __GNUC_PREREQ(3, 4) + +/* Inline definition for feraiseexcept. */ +# define feraiseexcept(__excepts) \ + (__extension__ ({ \ + int __e = __excepts; \ + int __ret; \ + if (__builtin_constant_p (__e) \ + && (__e & (__e - 1)) == 0 \ + && __e != FE_INVALID) \ + { \ + if (__e != 0) \ + __asm__ __volatile__ ("mtfsb1 %0" \ + : : "i#*X" (__builtin_clz (__e))); \ + __ret = 0; \ + } \ + else \ + __ret = feraiseexcept (__e); \ + __ret; \ + })) + +/* Inline definition for feclearexcept. */ +# define feclearexcept(__excepts) \ + (__extension__ ({ \ + int __e = __excepts; \ + int __ret; \ + if (__builtin_constant_p (__e) \ + && (__e & (__e - 1)) == 0 \ + && __e != FE_INVALID) \ + { \ + if (__e != 0) \ + __asm__ __volatile__ ("mtfsb0 %0" \ + : : "i#*X" (__builtin_clz (__e))); \ + __ret = 0; \ + } \ + else \ + __ret = feclearexcept (__e); \ + __ret; \ + })) + +# endif /* __GNUC_PREREQ(3, 4). */ + +# endif /* !__NO_MATH_INLINES. */ + +#endif /* __GNUC__ && !_SOFT_FLOAT && !__NO_FPRS__ */ diff --git a/REORG.TODO/sysdeps/powerpc/bits/fp-fast.h b/REORG.TODO/sysdeps/powerpc/bits/fp-fast.h new file mode 100644 index 0000000000..9faf1b7c51 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/bits/fp-fast.h @@ -0,0 +1,39 @@ +/* Define FP_FAST_* macros. PowerPC version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_H +# error "Never use <bits/fp-fast.h> directly; include <math.h> instead." +#endif + +#ifdef __USE_ISOC99 + +/* The GCC 4.6 compiler will define __FP_FAST_FMA{,F,L} if the fma{,f,l} + builtins are supported. */ +# if (!defined _SOFT_FLOAT && !defined __NO_FPRS__) || defined __FP_FAST_FMA +# define FP_FAST_FMA 1 +# endif + +# if (!defined _SOFT_FLOAT && !defined __NO_FPRS__) || defined __FP_FAST_FMAF +# define FP_FAST_FMAF 1 +# endif + +# ifdef __FP_FAST_FMAL +# define FP_FAST_FMAL 1 +# endif + +#endif diff --git a/REORG.TODO/sysdeps/powerpc/bits/hwcap.h b/REORG.TODO/sysdeps/powerpc/bits/hwcap.h new file mode 100644 index 0000000000..c9daeedfde --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/bits/hwcap.h @@ -0,0 +1,71 @@ +/* Defines for bits in AT_HWCAP and AT_HWCAP2. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if !defined(_SYS_AUXV_H) && !defined(_SYSDEPS_SYSDEP_H) +# error "Never include <bits/hwcap.h> directly; use <sys/auxv.h> instead." +#endif + +/* The bit numbers must match those in the kernel's asm/cputable.h. */ + +/* Feature definitions in AT_HWCAP. */ +#define PPC_FEATURE_32 0x80000000 /* 32-bit mode. */ +#define PPC_FEATURE_64 0x40000000 /* 64-bit mode. */ +#define PPC_FEATURE_601_INSTR 0x20000000 /* 601 chip, Old POWER ISA. */ +#define PPC_FEATURE_HAS_ALTIVEC 0x10000000 /* SIMD/Vector Unit. */ +#define PPC_FEATURE_HAS_FPU 0x08000000 /* Floating Point Unit. */ +#define PPC_FEATURE_HAS_MMU 0x04000000 /* Memory Management Unit. */ +#define PPC_FEATURE_HAS_4xxMAC 0x02000000 /* 4xx Multiply Accumulator. */ +#define PPC_FEATURE_UNIFIED_CACHE 0x01000000 /* Unified I/D cache. */ +#define PPC_FEATURE_HAS_SPE 0x00800000 /* Signal Processing ext. */ +#define PPC_FEATURE_HAS_EFP_SINGLE 0x00400000 /* SPE Float. */ +#define PPC_FEATURE_HAS_EFP_DOUBLE 0x00200000 /* SPE Double. */ +#define PPC_FEATURE_NO_TB 0x00100000 /* 601/403gx have no timebase */ +#define PPC_FEATURE_POWER4 0x00080000 /* POWER4 ISA 2.00 */ +#define PPC_FEATURE_POWER5 0x00040000 /* POWER5 ISA 2.02 */ +#define PPC_FEATURE_POWER5_PLUS 0x00020000 /* POWER5+ ISA 2.03 */ +#define PPC_FEATURE_CELL_BE 0x00010000 /* CELL Broadband Engine */ +#define PPC_FEATURE_BOOKE 0x00008000 /* ISA Category Embedded */ +#define PPC_FEATURE_SMT 0x00004000 /* Simultaneous + Multi-Threading */ +#define PPC_FEATURE_ICACHE_SNOOP 0x00002000 +#define PPC_FEATURE_ARCH_2_05 0x00001000 /* ISA 2.05 */ +#define PPC_FEATURE_PA6T 0x00000800 /* PA Semi 6T Core */ +#define PPC_FEATURE_HAS_DFP 0x00000400 /* Decimal FP Unit */ +#define PPC_FEATURE_POWER6_EXT 0x00000200 /* P6 + mffgpr/mftgpr */ +#define PPC_FEATURE_ARCH_2_06 0x00000100 /* ISA 2.06 */ +#define PPC_FEATURE_HAS_VSX 0x00000080 /* P7 Vector Extension. */ +#define PPC_FEATURE_PSERIES_PERFMON_COMPAT 0x00000040 +#define PPC_FEATURE_TRUE_LE 0x00000002 +#define PPC_FEATURE_PPC_LE 0x00000001 + +/* Feature definitions in AT_HWCAP2. */ +#define PPC_FEATURE2_ARCH_2_07 0x80000000 /* ISA 2.07 */ +#define PPC_FEATURE2_HAS_HTM 0x40000000 /* Hardware Transactional + Memory */ +#define PPC_FEATURE2_HAS_DSCR 0x20000000 /* Data Stream Control + Register */ +#define PPC_FEATURE2_HAS_EBB 0x10000000 /* Event Base Branching */ +#define PPC_FEATURE2_HAS_ISEL 0x08000000 /* Integer Select */ +#define PPC_FEATURE2_HAS_TAR 0x04000000 /* Target Address Register */ +#define PPC_FEATURE2_HAS_VEC_CRYPTO 0x02000000 /* Target supports vector + instruction. */ +#define PPC_FEATURE2_HTM_NOSC 0x01000000 /* Kernel aborts transaction + when a syscall is made. */ +#define PPC_FEATURE2_ARCH_3_00 0x00800000 /* ISA 3.0 */ +#define PPC_FEATURE2_HAS_IEEE128 0x00400000 /* VSX IEEE Binary Float + 128-bit */ diff --git a/REORG.TODO/sysdeps/powerpc/bits/link.h b/REORG.TODO/sysdeps/powerpc/bits/link.h new file mode 100644 index 0000000000..1cab121a65 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/bits/link.h @@ -0,0 +1,156 @@ +/* Machine-specific declarations for dynamic linker interface. PowerPC version + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _LINK_H +# error "Never include <bits/link.h> directly; use <link.h> instead." +#endif + + +#if __ELF_NATIVE_CLASS == 32 + +/* Registers for entry into PLT on PPC32. */ +typedef struct La_ppc32_regs +{ + uint32_t lr_reg[8]; + double lr_fp[8]; + uint32_t lr_vreg[12][4]; + uint32_t lr_r1; + uint32_t lr_lr; +} La_ppc32_regs; + +/* Return values for calls from PLT on PPC32. */ +typedef struct La_ppc32_retval +{ + uint32_t lrv_r3; + uint32_t lrv_r4; + double lrv_fp[8]; + uint32_t lrv_v2[4]; +} La_ppc32_retval; + + +__BEGIN_DECLS + +extern Elf32_Addr la_ppc32_gnu_pltenter (Elf32_Sym *__sym, + unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + La_ppc32_regs *__regs, + unsigned int *__flags, + const char *__symname, + long int *__framesizep); +extern unsigned int la_ppc32_gnu_pltexit (Elf32_Sym *__sym, + unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + const La_ppc32_regs *__inregs, + La_ppc32_retval *__outregs, + const char *__symname); + +__END_DECLS + +#elif __ELF_NATIVE_CLASS == 64 +# if _CALL_ELF != 2 + +/* Registers for entry into PLT on PPC64. */ +typedef struct La_ppc64_regs +{ + uint64_t lr_reg[8]; + double lr_fp[13]; + uint32_t __padding; + uint32_t lr_vrsave; + uint32_t lr_vreg[12][4]; + uint64_t lr_r1; + uint64_t lr_lr; +} La_ppc64_regs; + +/* Return values for calls from PLT on PPC64. */ +typedef struct La_ppc64_retval +{ + uint64_t lrv_r3; + uint64_t lrv_r4; + double lrv_fp[4]; /* f1-f4, float - complex long double. */ + uint32_t lrv_v2[4]; /* v2. */ +} La_ppc64_retval; + + +__BEGIN_DECLS + +extern Elf64_Addr la_ppc64_gnu_pltenter (Elf64_Sym *__sym, + unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + La_ppc64_regs *__regs, + unsigned int *__flags, + const char *__symname, + long int *__framesizep); +extern unsigned int la_ppc64_gnu_pltexit (Elf64_Sym *__sym, + unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + const La_ppc64_regs *__inregs, + La_ppc64_retval *__outregs, + const char *__symname); + +__END_DECLS + +# else + +/* Registers for entry into PLT on PPC64 in the ELFv2 ABI. */ +typedef struct La_ppc64v2_regs +{ + uint64_t lr_reg[8]; + double lr_fp[13]; + uint32_t __padding; + uint32_t lr_vrsave; + uint32_t lr_vreg[12][4] __attribute__ ((aligned (16))); + uint64_t lr_r1; + uint64_t lr_lr; +} La_ppc64v2_regs; + +/* Return values for calls from PLT on PPC64 in the ELFv2 ABI. */ +typedef struct La_ppc64v2_retval +{ + uint64_t lrv_r3; + uint64_t lrv_r4; + double lrv_fp[10]; + uint32_t lrv_vreg[8][4] __attribute__ ((aligned (16))); +} La_ppc64v2_retval; + + +__BEGIN_DECLS + +extern Elf64_Addr la_ppc64v2_gnu_pltenter (Elf64_Sym *__sym, + unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + La_ppc64v2_regs *__regs, + unsigned int *__flags, + const char *__symname, + long int *__framesizep); +extern unsigned int la_ppc64v2_gnu_pltexit (Elf64_Sym *__sym, + unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + const La_ppc64v2_regs *__inregs, + La_ppc64v2_retval *__outregs, + const char *__symname); + +__END_DECLS + +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/bits/mathinline.h b/REORG.TODO/sysdeps/powerpc/bits/mathinline.h new file mode 100644 index 0000000000..e5f0cd30f2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/bits/mathinline.h @@ -0,0 +1,132 @@ +/* Inline math functions for powerpc. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_H +# error "Never use <bits/mathinline.h> directly; include <math.h> instead." +#endif + +#ifndef __extern_inline +# define __MATH_INLINE __inline +#else +# define __MATH_INLINE __extern_inline +#endif /* __cplusplus */ + +#if defined __GNUC__ && !defined _SOFT_FLOAT && !defined __NO_FPRS__ + +#ifdef __USE_ISOC99 +# if !__GNUC_PREREQ (2,97) +# define __unordered_cmp(x, y) \ + (__extension__ \ + ({ __typeof__(x) __x = (x); __typeof__(y) __y = (y); \ + unsigned __r; \ + __asm__("fcmpu 7,%1,%2 ; mfcr %0" : "=r" (__r) : "f" (__x), "f"(__y) \ + : "cr7"); \ + __r; })) + +# undef isgreater +# undef isgreaterequal +# undef isless +# undef islessequal +# undef islessgreater +# undef isunordered + +# define isgreater(x, y) (__unordered_cmp (x, y) >> 2 & 1) +# define isgreaterequal(x, y) ((__unordered_cmp (x, y) & 6) != 0) +# define isless(x, y) (__unordered_cmp (x, y) >> 3 & 1) +# define islessequal(x, y) ((__unordered_cmp (x, y) & 0xA) != 0) +# define islessgreater(x, y) ((__unordered_cmp (x, y) & 0xC) != 0) +# define isunordered(x, y) (__unordered_cmp (x, y) & 1) + +# endif /* __GNUC_PREREQ (2,97) */ + +/* The gcc, version 2.7 or below, has problems with all this inlining + code. So disable it for this version of the compiler. */ +# if __GNUC_PREREQ (2, 8) +/* Test for negative number. Used in the signbit() macro. */ +__MATH_INLINE int +__NTH (__signbitf (float __x)) +{ +#if __GNUC_PREREQ (4, 0) + return __builtin_signbitf (__x); +#else + __extension__ union { float __f; int __i; } __u = { __f: __x }; + return __u.__i < 0; +#endif +} +__MATH_INLINE int +__NTH (__signbit (double __x)) +{ +#if __GNUC_PREREQ (4, 0) + return __builtin_signbit (__x); +#else + __extension__ union { double __d; long long __i; } __u = { __d: __x }; + return __u.__i < 0; +#endif +} +# ifdef __LONG_DOUBLE_128__ +__MATH_INLINE int +__NTH (__signbitl (long double __x)) +{ + return __signbit ((double) __x); +} +# endif +# endif +#endif /* __USE_ISOC99 */ + +#if !defined __NO_MATH_INLINES && defined __OPTIMIZE__ + +#ifdef __USE_ISOC99 + +# ifndef __powerpc64__ +__MATH_INLINE long int lrint (double __x) __THROW; +__MATH_INLINE long int +__NTH (lrint (double __x)) +{ + union { + double __d; + long long __ll; + } __u; + __asm__ ("fctiw %0,%1" : "=f"(__u.__d) : "f"(__x)); + return __u.__ll; +} + +__MATH_INLINE long int lrintf (float __x) __THROW; +__MATH_INLINE long int +__NTH (lrintf (float __x)) +{ + return lrint ((double) __x); +} +# endif + +__MATH_INLINE double fdim (double __x, double __y) __THROW; +__MATH_INLINE double +__NTH (fdim (double __x, double __y)) +{ + return __x <= __y ? 0 : __x - __y; +} + +__MATH_INLINE float fdimf (float __x, float __y) __THROW; +__MATH_INLINE float +__NTH (fdimf (float __x, float __y)) +{ + return __x <= __y ? 0 : __x - __y; +} + +#endif /* __USE_ISOC99 */ +#endif /* !__NO_MATH_INLINES && __OPTIMIZE__ */ +#endif /* __GNUC__ && !_SOFT_FLOAT && !__NO_FPRS__ */ diff --git a/REORG.TODO/sysdeps/powerpc/bits/setjmp.h b/REORG.TODO/sysdeps/powerpc/bits/setjmp.h new file mode 100644 index 0000000000..02568951e9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/bits/setjmp.h @@ -0,0 +1,50 @@ +/* Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define the machine-dependent type `jmp_buf'. PowerPC version. */ +#ifndef _BITS_SETJMP_H +#define _BITS_SETJMP_H 1 + +#if !defined _SETJMP_H && !defined _PTHREAD_H +# error "Never include <bits/setjmp.h> directly; use <setjmp.h> instead." +#endif + +/* The previous bits/setjmp.h had __jmp_buf defined as a structure. + We use an array of 'long int' instead, to make writing the + assembler easier. Naturally, user code should not depend on + either representation. */ + +#include <bits/wordsize.h> + +/* The current powerpc 32-bit Altivec ABI specifies for SVR4 ABI and EABI + the vrsave must be at byte 248 & v20 at byte 256. So we must pad this + correctly on 32 bit. It also insists that vecregs are only gauranteed + 4 byte alignment so we need to use vperm in the setjmp/longjmp routines. + We have to version the code because members like int __mask_was_saved + in the jmp_buf will move as jmp_buf is now larger than 248 bytes. We + cannot keep the altivec jmp_buf backward compatible with the jmp_buf. */ +#ifndef _ASM +# if __WORDSIZE == 64 +typedef long int __jmp_buf[64] __attribute__ ((__aligned__ (16))); +# else +/* The alignment is not essential, i.e.the buffer can be copied to a 4 byte + aligned buffer as per the ABI it is just added for performance reasons. */ +typedef long int __jmp_buf[64 + (12 * 4)] __attribute__ ((__aligned__ (16))); +# endif +#endif + +#endif /* bits/setjmp.h */ diff --git a/REORG.TODO/sysdeps/powerpc/dl-procinfo.c b/REORG.TODO/sysdeps/powerpc/dl-procinfo.c new file mode 100644 index 0000000000..cd7329b84e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/dl-procinfo.c @@ -0,0 +1,77 @@ +/* Data for processor capability information. PowerPC version. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This information must be kept in sync with the _DL_HWCAP_COUNT + definition in procinfo.h. + + If anything should be added here check whether the size of each string + is still ok with the given array size. + + All the #ifdefs in the definitions are quite irritating but + necessary if we want to avoid duplicating the information. There + are three different modes: + + - PROCINFO_DECL is defined. This means we are only interested in + declarations. + + - PROCINFO_DECL is not defined: + + + if SHARED is defined the file is included in an array + initializer. The .element = { ... } syntax is needed. + + + if SHARED is not defined a normal array initialization is + needed. + */ + +#ifndef PROCINFO_CLASS +# define PROCINFO_CLASS +#endif + +#if !defined PROCINFO_DECL && defined SHARED + ._dl_powerpc_cap_flags +#else +PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][10] +#endif +#ifndef PROCINFO_DECL += { + "ppcle", "true_le", "", "", + "", "", "archpmu", "vsx", + "arch_2_06", "power6x", "dfp", "pa6t", + "arch_2_05", "ic_snoop", "smt", "booke", + "cellbe", "power5+", "power5", "power4", + "notb", "efpdouble", "efpsingle", "spe", + "ucache", "4xxmac", "mmu", "fpu", + "altivec", "ppc601", "ppc64", "ppc32", + "", "", "", "", + "", "", "", "", + "", "", "", "", + "", "", "", "", + "", "", "", "", + "", "", "ieee128", "arch_3_00", + "htm-nosc", "vcrypto", "tar", "isel", + "ebb", "dscr", "htm", "arch_2_07", + } +#endif +#if !defined SHARED || defined PROCINFO_DECL +; +#else +, +#endif + +#undef PROCINFO_DECL +#undef PROCINFO_CLASS diff --git a/REORG.TODO/sysdeps/powerpc/dl-procinfo.h b/REORG.TODO/sysdeps/powerpc/dl-procinfo.h new file mode 100644 index 0000000000..216d20fbff --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/dl-procinfo.h @@ -0,0 +1,185 @@ +/* Processor capability information handling macros. PowerPC version. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _DL_PROCINFO_H +#define _DL_PROCINFO_H 1 + +#include <ldsodefs.h> +#include <sysdep.h> /* This defines the PPC_FEATURE[2]_* macros. */ + +/* The total number of available bits (including those prior to + _DL_HWCAP_FIRST). Some of these bits might not be used. */ +#define _DL_HWCAP_COUNT 64 + +/* Features started at bit 31 and decremented as new features were added. */ +#define _DL_HWCAP_LAST 31 + +/* AT_HWCAP2 features started at bit 31 and decremented as new features were + added. HWCAP2 feature bits start at bit 0. */ +#define _DL_HWCAP2_LAST 31 + +/* These bits influence library search. */ +#define HWCAP_IMPORTANT (PPC_FEATURE_HAS_ALTIVEC \ + + PPC_FEATURE_HAS_DFP) + +#define _DL_PLATFORMS_COUNT 15 + +#define _DL_FIRST_PLATFORM 32 +/* Mask to filter out platforms. */ +#define _DL_HWCAP_PLATFORM (((1ULL << _DL_PLATFORMS_COUNT) - 1) \ + << _DL_FIRST_PLATFORM) + +/* Platform bits (relative to _DL_FIRST_PLATFORM). */ +#define PPC_PLATFORM_POWER4 0 +#define PPC_PLATFORM_PPC970 1 +#define PPC_PLATFORM_POWER5 2 +#define PPC_PLATFORM_POWER5_PLUS 3 +#define PPC_PLATFORM_POWER6 4 +#define PPC_PLATFORM_CELL_BE 5 +#define PPC_PLATFORM_POWER6X 6 +#define PPC_PLATFORM_POWER7 7 +#define PPC_PLATFORM_PPCA2 8 +#define PPC_PLATFORM_PPC405 9 +#define PPC_PLATFORM_PPC440 10 +#define PPC_PLATFORM_PPC464 11 +#define PPC_PLATFORM_PPC476 12 +#define PPC_PLATFORM_POWER8 13 +#define PPC_PLATFORM_POWER9 14 + +static inline const char * +__attribute__ ((unused)) +_dl_hwcap_string (int idx) +{ + return GLRO(dl_powerpc_cap_flags)[idx]; +} + +static inline int +__attribute__ ((unused)) +_dl_string_hwcap (const char *str) +{ + for (int i = 0; i < _DL_HWCAP_COUNT; ++i) + if (strcmp (str, _dl_hwcap_string (i)) == 0) + return i; + return -1; +} + +static inline int +__attribute__ ((unused, always_inline)) +_dl_string_platform (const char *str) +{ + if (str == NULL) + return -1; + + if (strncmp (str, "power", 5) == 0) + { + int ret; + str += 5; + switch (*str) + { + case '4': + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER4; + break; + case '5': + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER5; + if (str[1] == '+') + { + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER5_PLUS; + ++str; + } + break; + case '6': + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER6; + if (str[1] == 'x') + { + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER6X; + ++str; + } + break; + case '7': + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER7; + break; + case '8': + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER8; + break; + case '9': + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER9; + break; + default: + return -1; + } + if (str[1] == '\0') + return ret; + } + else if (strncmp (str, "ppc", 3) == 0) + { + if (strcmp (str + 3, "970") == 0) + return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC970; + else if (strcmp (str + 3, "-cell-be") == 0) + return _DL_FIRST_PLATFORM + PPC_PLATFORM_CELL_BE; + else if (strcmp (str + 3, "a2") == 0) + return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPCA2; + else if (strcmp (str + 3, "405") == 0) + return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC405; + else if (strcmp (str + 3, "440") == 0) + return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC440; + else if (strcmp (str + 3, "464") == 0) + return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC464; + else if (strcmp (str + 3, "476") == 0) + return _DL_FIRST_PLATFORM + PPC_PLATFORM_PPC476; + } + + return -1; +} + +#if IS_IN (rtld) +static inline int +__attribute__ ((unused)) +_dl_procinfo (unsigned int type, unsigned long int word) +{ + switch(type) + { + case AT_HWCAP: + _dl_printf ("AT_HWCAP: "); + + for (int i = 0; i <= _DL_HWCAP_LAST; ++i) + if (word & (1 << i)) + _dl_printf (" %s", _dl_hwcap_string (i)); + break; + case AT_HWCAP2: + { + unsigned int offset = _DL_HWCAP_LAST + 1; + + _dl_printf ("AT_HWCAP2: "); + + /* We have to go through them all because the kernel added the + AT_HWCAP2 features starting with the high bits. */ + for (int i = 0; i <= _DL_HWCAP2_LAST; ++i) + if (word & (1 << i)) + _dl_printf (" %s", _dl_hwcap_string (offset + i)); + break; + } + default: + /* This should not happen. */ + return -1; + } + _dl_printf ("\n"); + return 0; +} +#endif + +#endif /* dl-procinfo.h */ diff --git a/REORG.TODO/sysdeps/powerpc/dl-tls.c b/REORG.TODO/sysdeps/powerpc/dl-tls.c new file mode 100644 index 0000000000..f666d53b49 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/dl-tls.c @@ -0,0 +1,24 @@ +/* Thread-local storage handling in the ELF dynamic linker. PowerPC version. + Copyright (C) 2009-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include "elf/dl-tls.c" + +#ifdef SHARED +strong_alias(__tls_get_addr, __tls_get_addr_opt) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/dl-tls.h b/REORG.TODO/sysdeps/powerpc/dl-tls.h new file mode 100644 index 0000000000..54beaee5ae --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/dl-tls.h @@ -0,0 +1,52 @@ +/* Thread-local storage handling in the ELF dynamic linker. PowerPC version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _PPC_DL_TLS_H +# define _PPC_DL_TLS_H 1 + +/* Type used for the representation of TLS information in the TOC. */ +typedef struct +{ + unsigned long int ti_module; + unsigned long int ti_offset; +} tls_index; + +/* The thread pointer points 0x7000 past the first static TLS block. */ +#define TLS_TP_OFFSET 0x7000 + +/* Dynamic thread vector pointers point 0x8000 past the start of each + TLS block. */ +#define TLS_DTV_OFFSET 0x8000 + +/* Compute the value for a @tprel reloc. */ +#define TLS_TPREL_VALUE(sym_map, sym, reloc) \ + ((sym_map)->l_tls_offset + (sym)->st_value + (reloc)->r_addend \ + - TLS_TP_OFFSET) + +/* Compute the value for a @dtprel reloc. */ +#define TLS_DTPREL_VALUE(sym, reloc) \ + ((sym)->st_value + (reloc)->r_addend - TLS_DTV_OFFSET) + +#ifdef SHARED +extern void *__tls_get_addr (tls_index *ti); + +# define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET) +# define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET) +#endif + +#endif /* dl-tls.h */ diff --git a/REORG.TODO/sysdeps/powerpc/ffs.c b/REORG.TODO/sysdeps/powerpc/ffs.c new file mode 100644 index 0000000000..125683b0f9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/ffs.c @@ -0,0 +1,47 @@ +/* Find first set bit in a word, counted from least significant end. + For PowerPC. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Torbjorn Granlund (tege@sics.se). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define ffsl __something_else +#include <limits.h> +#include <string.h> + +#undef ffs + +#ifdef __GNUC__ + +int +__ffs (int x) +{ + int cnt; + + asm ("cntlzw %0,%1" : "=r" (cnt) : "r" (x & -x)); + return 32 - cnt; +} +weak_alias (__ffs, ffs) +libc_hidden_def (__ffs) +libc_hidden_builtin_def (ffs) +#if ULONG_MAX == UINT_MAX +#undef ffsl +weak_alias (__ffs, ffsl) +#endif + +#else +#include <string/ffs.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/fpu/Makefile b/REORG.TODO/sysdeps/powerpc/fpu/Makefile new file mode 100644 index 0000000000..53470a9cf2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/Makefile @@ -0,0 +1,7 @@ +ifeq ($(subdir),math) +libm-support += fenv_const fe_nomask fe_mask t_sqrt +endif + +ifeq ($(subdir),stdlib) +tests += tst-setcontext-fpscr +endif diff --git a/REORG.TODO/sysdeps/powerpc/fpu/e_hypot.c b/REORG.TODO/sysdeps/powerpc/fpu/e_hypot.c new file mode 100644 index 0000000000..2685ca6ba0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/e_hypot.c @@ -0,0 +1,134 @@ +/* Pythagorean addition using doubles + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <stdint.h> + +static const double two60 = 1.152921504606847e+18; +static const double two500 = 3.2733906078961419e+150; +static const double two600 = 4.149515568880993e+180; +static const double two1022 = 4.49423283715579e+307; +static const double twoM500 = 3.054936363499605e-151; +static const double twoM600 = 2.4099198651028841e-181; +static const double two60factor = 1.5592502418239997e+290; +static const double pdnum = 2.225073858507201e-308; + +/* __ieee754_hypot(x,y) + * + * This a FP only version without any FP->INT conversion. + * It is similar to default C version, making appropriates + * overflow and underflows checks as well scaling when it + * is needed. + */ + +#ifdef _ARCH_PWR7 +/* POWER7 isinf and isnan optimization are fast. */ +# define TEST_INF_NAN(x, y) \ + if ((isinf(x) || isinf(y)) \ + && !issignaling (x) && !issignaling (y)) \ + return INFINITY; \ + if (isnan(x) || isnan(y)) \ + return x + y; +# else +/* For POWER6 and below isinf/isnan triggers LHS and PLT calls are + * costly (especially for POWER6). */ +# define GET_TW0_HIGH_WORD(d1,d2,i1,i2) \ + do { \ + ieee_double_shape_type gh_u1; \ + ieee_double_shape_type gh_u2; \ + gh_u1.value = (d1); \ + gh_u2.value = (d2); \ + (i1) = gh_u1.parts.msw & 0x7fffffff; \ + (i2) = gh_u2.parts.msw & 0x7fffffff; \ + } while (0) + +# define TEST_INF_NAN(x, y) \ + do { \ + uint32_t hx, hy; \ + GET_TW0_HIGH_WORD(x, y, hx, hy); \ + if (hy > hx) { \ + uint32_t ht = hx; hx = hy; hy = ht; \ + } \ + if (hx >= 0x7ff00000) { \ + if ((hx == 0x7ff00000 || hy == 0x7ff00000) \ + && !issignaling (x) && !issignaling (y)) \ + return INFINITY; \ + return x + y; \ + } \ + } while (0) + +#endif + + +double +__ieee754_hypot (double x, double y) +{ + x = fabs (x); + y = fabs (y); + + TEST_INF_NAN (x, y); + + if (y > x) + { + double t = x; + x = y; + y = t; + } + if (y == 0.0) + return x; + /* if y is higher enough, y * 2^60 might overflow. The tests if + y >= 1.7976931348623157e+308/2^60 (two60factor) and uses the + appropriate check to avoid the overflow exception generation. */ + if (y > two60factor) + { + if ((x / y) > two60) + return x + y; + } + else + { + if (x > (y * two60)) + return x + y; + } + if (x > two500) + { + x *= twoM600; + y *= twoM600; + return __ieee754_sqrt (x * x + y * y) / twoM600; + } + if (y < twoM500) + { + if (y <= pdnum) + { + x *= two1022; + y *= two1022; + double ret = __ieee754_sqrt (x * x + y * y) / two1022; + math_check_force_underflow_nonneg (ret); + return ret; + } + else + { + x *= two600; + y *= two600; + return __ieee754_sqrt (x * x + y * y) / two600; + } + } + return __ieee754_sqrt (x * x + y * y); +} +strong_alias (__ieee754_hypot, __hypot_finite) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/e_hypotf.c b/REORG.TODO/sysdeps/powerpc/fpu/e_hypotf.c new file mode 100644 index 0000000000..8502ca962a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/e_hypotf.c @@ -0,0 +1,76 @@ +/* Pythagorean addition using floats + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <stdint.h> + +/* __ieee754_hypotf(x,y) + + This a FP only version without any FP->INT conversion. + It is similar to default C version, making appropriates + overflow and underflows checks as using double precision + instead of scaling. */ + +#ifdef _ARCH_PWR7 +/* POWER7 isinf and isnan optimizations are fast. */ +# define TEST_INF_NAN(x, y) \ + if ((isinff(x) || isinff(y)) \ + && !issignaling (x) && !issignaling (y)) \ + return INFINITY; \ + if (isnanf(x) || isnanf(y)) \ + return x + y; +# else +/* For POWER6 and below isinf/isnan triggers LHS and PLT calls are + * costly (especially for POWER6). */ +# define GET_TWO_FLOAT_WORD(f1,f2,i1,i2) \ + do { \ + ieee_float_shape_type gf_u1; \ + ieee_float_shape_type gf_u2; \ + gf_u1.value = (f1); \ + gf_u2.value = (f2); \ + (i1) = gf_u1.word & 0x7fffffff; \ + (i2) = gf_u2.word & 0x7fffffff; \ + } while (0) + +# define TEST_INF_NAN(x, y) \ + do { \ + uint32_t hx, hy; \ + GET_TWO_FLOAT_WORD(x, y, hx, hy); \ + if (hy > hx) { \ + uint32_t ht = hx; hx = hy; hy = ht; \ + } \ + if (hx >= 0x7f800000) { \ + if ((hx == 0x7f800000 || hy == 0x7f800000) \ + && !issignaling (x) && !issignaling (y)) \ + return INFINITY; \ + return x + y; \ + } \ + } while (0) +#endif + + +float +__ieee754_hypotf (float x, float y) +{ + TEST_INF_NAN (x, y); + + return __ieee754_sqrt ((double) x * x + (double) y * y); +} +strong_alias (__ieee754_hypotf, __hypotf_finite) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/e_rem_pio2f.c b/REORG.TODO/sysdeps/powerpc/fpu/e_rem_pio2f.c new file mode 100644 index 0000000000..8563e7c5e4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/e_rem_pio2f.c @@ -0,0 +1,188 @@ +/* e_rem_pio2f.c -- float version of e_rem_pio2.c + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#include <math_private.h> +#include "s_float_bitwise.h" + +/* defined in sysdeps/powerpc/fpu/k_rem_pio2f.c */ +int __fp_kernel_rem_pio2f (float *x, float *y, float e0, int32_t nx); + +/* __ieee754_rem_pio2f(x,y) + * + * return the remainder of x rem pi/2 in y[0]+y[1] + */ + +static const float npio2_hw[] = { + 1.57077026e+00, 3.14154053e+00, 4.71228027e+00, 6.28308105e+00, + 7.85388184e+00, 9.42456055e+00, 1.09953613e+01, 1.25661621e+01, + 1.41369629e+01, 1.57077637e+01, 1.72783203e+01, 1.88491211e+01, + 2.04199219e+01, 2.19907227e+01, 2.35615234e+01, 2.51323242e+01, + 2.67031250e+01, 2.82739258e+01, 2.98447266e+01, 3.14155273e+01, + 3.29863281e+01, 3.45566406e+01, 3.61279297e+01, 3.76982422e+01, + 3.92695312e+01, 4.08398438e+01, 4.24111328e+01, 4.39814453e+01, + 4.55527344e+01, 4.71230469e+01, 4.86943359e+01, 5.02646484e+01 +}; + + +static const float zero = 0.0000000000e+00; +static const float two8 = 2.5600000000e+02; + +static const float half = 5.0000000000e-01; +static const float invpio2 = 6.3661980629e-01; +static const float pio2_1 = 1.5707855225e+00; +static const float pio2_1t = 1.0804334124e-05; +static const float pio2_2 = 1.0804273188e-05; +static const float pio2_2t = 6.0770999344e-11; +static const float pio2_3 = 6.0770943833e-11; +static const float pio2_3t = 6.1232342629e-17; + +static const float pio4 = 7.8539801e-01; +static const float pio3_4 = 2.3561945e+00; +static const float pio2_24b = 1.5707951e+00; +static const float pio2_2e7 = 2.0106054e+02; + + +int32_t +__ieee754_rem_pio2f (float x, float *y) +{ + float ax, z, n, r, w, t, e0; + float tx[3]; + int32_t i, nx; + + ax = __builtin_fabsf (x); + if (ax <= pio4) + { + y[0] = x; + y[1] = 0; + return 0; + } + if (ax < pio3_4) + { + if (x > 0) + { + z = x - pio2_1; + if (!__float_and_test28 (ax, pio2_24b)) + { + y[0] = z - pio2_1t; + y[1] = (z - y[0]) - pio2_1t; + } + else + { + z -= pio2_2; + y[0] = z - pio2_2t; + y[1] = (z - y[0]) - pio2_2t; + } + return 1; + } + else + { + z = x + pio2_1; + if (!__float_and_test28 (ax, pio2_24b)) + { + y[0] = z + pio2_1t; + y[1] = (z - y[0]) + pio2_1t; + } + else + { + z += pio2_2; + y[0] = z + pio2_2t; + y[1] = (z - y[0]) + pio2_2t; + } + return -1; + } + } + if (ax <= pio2_2e7) + { + n = __floorf (ax * invpio2 + half); + i = (int32_t) n; + r = ax - n * pio2_1; + w = n * pio2_1t; /* 1st round good to 40 bit */ + if (i < 32 && !__float_and_test24 (ax, npio2_hw[i - 1])) + { + y[0] = r - w; + } + else + { + float i, j; + j = __float_and8 (ax); + y[0] = r - w; + i = __float_and8 (y[0]); + if (j / i > 256.0 || j / i < 3.9062500e-3) + { /* 2nd iterations needed, good to 57 */ + t = r; + w = n * pio2_2; + r = t - w; + w = n * pio2_2t - ((t - r) - w); + y[0] = r - w; + i = __float_and8 (y[0]); + if (j / i > 33554432 || j / i < 2.9802322e-8) + { /* 3rd iteration needed, 74 bits acc */ + t = r; + w = n * pio2_3; + r = t - w; + w = n * pio2_3t - ((t - r) - w); + y[0] = r - w; + } + } + } + y[1] = (r - y[0]) - w; + if (x < 0) + { + y[0] = -y[0]; + y[1] = -y[1]; + return -i; + } + else + { + return i; + } + } + + /* all other (large) arguments */ + if (isnanf (x) || isinff (x)) + { + y[0] = y[1] = x - x; + return 0; + } + + /* set z = scalbn(|x|,ilogb(x)-7) */ + e0 = __float_and8 (ax / 128.0); + z = ax / e0; + + tx[0] = __floorf (z); + z = (z - tx[0]) * two8; + tx[1] = __floorf (z); + z = (z - tx[1]) * two8; + tx[2] = __floorf (z); + + nx = 3; + while (tx[nx - 1] == zero) + nx--; + + i = __fp_kernel_rem_pio2f (tx, y, e0, nx); + if (x < 0) + { + y[0] = -y[0]; + y[1] = -y[1]; + return -i; + } + return i; +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu/e_sqrt.c b/REORG.TODO/sysdeps/powerpc/fpu/e_sqrt.c new file mode 100644 index 0000000000..1c8977d6ad --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/e_sqrt.c @@ -0,0 +1,175 @@ +/* Double-precision floating point square root. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <fenv_libc.h> +#include <inttypes.h> +#include <stdint.h> +#include <sysdep.h> +#include <ldsodefs.h> + +#ifndef _ARCH_PPCSQ +static const double almost_half = 0.5000000000000001; /* 0.5 + 2^-53 */ +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float two108 = 3.245185536584267269e+32; +static const float twom54 = 5.551115123125782702e-17; +extern const float __t_sqrt[1024]; + +/* The method is based on a description in + Computation of elementary functions on the IBM RISC System/6000 processor, + P. W. Markstein, IBM J. Res. Develop, 34(1) 1990. + Basically, it consists of two interleaved Newton-Raphson approximations, + one to find the actual square root, and one to find its reciprocal + without the expense of a division operation. The tricky bit here + is the use of the POWER/PowerPC multiply-add operation to get the + required accuracy with high speed. + + The argument reduction works by a combination of table lookup to + obtain the initial guesses, and some careful modification of the + generated guesses (which mostly runs on the integer unit, while the + Newton-Raphson is running on the FPU). */ + +double +__slow_ieee754_sqrt (double x) +{ + const float inf = a_inf.value; + + if (x > 0) + { + /* schedule the EXTRACT_WORDS to get separation between the store + and the load. */ + ieee_double_shape_type ew_u; + ieee_double_shape_type iw_u; + ew_u.value = (x); + if (x != inf) + { + /* Variables named starting with 's' exist in the + argument-reduced space, so that 2 > sx >= 0.5, + 1.41... > sg >= 0.70.., 0.70.. >= sy > 0.35... . + Variables named ending with 'i' are integer versions of + floating-point values. */ + double sx; /* The value of which we're trying to find the + square root. */ + double sg, g; /* Guess of the square root of x. */ + double sd, d; /* Difference between the square of the guess and x. */ + double sy; /* Estimate of 1/2g (overestimated by 1ulp). */ + double sy2; /* 2*sy */ + double e; /* Difference between y*g and 1/2 (se = e * fsy). */ + double shx; /* == sx * fsg */ + double fsg; /* sg*fsg == g. */ + fenv_t fe; /* Saved floating-point environment (stores rounding + mode and whether the inexact exception is + enabled). */ + uint32_t xi0, xi1, sxi, fsgi; + const float *t_sqrt; + + fe = fegetenv_register (); + /* complete the EXTRACT_WORDS (xi0,xi1,x) operation. */ + xi0 = ew_u.parts.msw; + xi1 = ew_u.parts.lsw; + relax_fenv_state (); + sxi = (xi0 & 0x3fffffff) | 0x3fe00000; + /* schedule the INSERT_WORDS (sx, sxi, xi1) to get separation + between the store and the load. */ + iw_u.parts.msw = sxi; + iw_u.parts.lsw = xi1; + t_sqrt = __t_sqrt + (xi0 >> (52 - 32 - 8 - 1) & 0x3fe); + sg = t_sqrt[0]; + sy = t_sqrt[1]; + /* complete the INSERT_WORDS (sx, sxi, xi1) operation. */ + sx = iw_u.value; + + /* Here we have three Newton-Raphson iterations each of a + division and a square root and the remainder of the + argument reduction, all interleaved. */ + sd = -__builtin_fma (sg, sg, -sx); + fsgi = (xi0 + 0x40000000) >> 1 & 0x7ff00000; + sy2 = sy + sy; + sg = __builtin_fma (sy, sd, sg); /* 16-bit approximation to + sqrt(sx). */ + + /* schedule the INSERT_WORDS (fsg, fsgi, 0) to get separation + between the store and the load. */ + INSERT_WORDS (fsg, fsgi, 0); + iw_u.parts.msw = fsgi; + iw_u.parts.lsw = (0); + e = -__builtin_fma (sy, sg, -almost_half); + sd = -__builtin_fma (sg, sg, -sx); + if ((xi0 & 0x7ff00000) == 0) + goto denorm; + sy = __builtin_fma (e, sy2, sy); + sg = __builtin_fma (sy, sd, sg); /* 32-bit approximation to + sqrt(sx). */ + sy2 = sy + sy; + /* complete the INSERT_WORDS (fsg, fsgi, 0) operation. */ + fsg = iw_u.value; + e = -__builtin_fma (sy, sg, -almost_half); + sd = -__builtin_fma (sg, sg, -sx); + sy = __builtin_fma (e, sy2, sy); + shx = sx * fsg; + sg = __builtin_fma (sy, sd, sg); /* 64-bit approximation to + sqrt(sx), but perhaps + rounded incorrectly. */ + sy2 = sy + sy; + g = sg * fsg; + e = -__builtin_fma (sy, sg, -almost_half); + d = -__builtin_fma (g, sg, -shx); + sy = __builtin_fma (e, sy2, sy); + fesetenv_register (fe); + return __builtin_fma (sy, d, g); + denorm: + /* For denormalised numbers, we normalise, calculate the + square root, and return an adjusted result. */ + fesetenv_register (fe); + return __slow_ieee754_sqrt (x * two108) * twom54; + } + } + else if (x < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + __feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + __feraiseexcept (FE_INVALID); + x = a_nan.value; + } + return f_wash (x); +} +#endif /* _ARCH_PPCSQ */ + +#undef __ieee754_sqrt +double +__ieee754_sqrt (double x) +{ + double z; + +#ifdef _ARCH_PPCSQ + asm ("fsqrt %0,%1\n" :"=f" (z):"f" (x)); +#else + z = __slow_ieee754_sqrt (x); +#endif + + return z; +} +strong_alias (__ieee754_sqrt, __sqrt_finite) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/e_sqrtf.c b/REORG.TODO/sysdeps/powerpc/fpu/e_sqrtf.c new file mode 100644 index 0000000000..65d27b4d42 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/e_sqrtf.c @@ -0,0 +1,150 @@ +/* Single-precision floating point square root. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <fenv_libc.h> +#include <inttypes.h> +#include <stdint.h> +#include <sysdep.h> +#include <ldsodefs.h> + +#ifndef _ARCH_PPCSQ +static const float almost_half = 0.50000006; /* 0.5 + 2^-24 */ +static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 }; +static const ieee_float_shape_type a_inf = {.word = 0x7f800000 }; +static const float two48 = 281474976710656.0; +static const float twom24 = 5.9604644775390625e-8; +extern const float __t_sqrt[1024]; + +/* The method is based on a description in + Computation of elementary functions on the IBM RISC System/6000 processor, + P. W. Markstein, IBM J. Res. Develop, 34(1) 1990. + Basically, it consists of two interleaved Newton-Raphson approximations, + one to find the actual square root, and one to find its reciprocal + without the expense of a division operation. The tricky bit here + is the use of the POWER/PowerPC multiply-add operation to get the + required accuracy with high speed. + + The argument reduction works by a combination of table lookup to + obtain the initial guesses, and some careful modification of the + generated guesses (which mostly runs on the integer unit, while the + Newton-Raphson is running on the FPU). */ + +float +__slow_ieee754_sqrtf (float x) +{ + const float inf = a_inf.value; + + if (x > 0) + { + if (x != inf) + { + /* Variables named starting with 's' exist in the + argument-reduced space, so that 2 > sx >= 0.5, + 1.41... > sg >= 0.70.., 0.70.. >= sy > 0.35... . + Variables named ending with 'i' are integer versions of + floating-point values. */ + float sx; /* The value of which we're trying to find the square + root. */ + float sg, g; /* Guess of the square root of x. */ + float sd, d; /* Difference between the square of the guess and x. */ + float sy; /* Estimate of 1/2g (overestimated by 1ulp). */ + float sy2; /* 2*sy */ + float e; /* Difference between y*g and 1/2 (note that e==se). */ + float shx; /* == sx * fsg */ + float fsg; /* sg*fsg == g. */ + fenv_t fe; /* Saved floating-point environment (stores rounding + mode and whether the inexact exception is + enabled). */ + uint32_t xi, sxi, fsgi; + const float *t_sqrt; + + GET_FLOAT_WORD (xi, x); + fe = fegetenv_register (); + relax_fenv_state (); + sxi = (xi & 0x3fffffff) | 0x3f000000; + SET_FLOAT_WORD (sx, sxi); + t_sqrt = __t_sqrt + (xi >> (23 - 8 - 1) & 0x3fe); + sg = t_sqrt[0]; + sy = t_sqrt[1]; + + /* Here we have three Newton-Raphson iterations each of a + division and a square root and the remainder of the + argument reduction, all interleaved. */ + sd = -__builtin_fmaf (sg, sg, -sx); + fsgi = (xi + 0x40000000) >> 1 & 0x7f800000; + sy2 = sy + sy; + sg = __builtin_fmaf (sy, sd, sg); /* 16-bit approximation to + sqrt(sx). */ + e = -__builtin_fmaf (sy, sg, -almost_half); + SET_FLOAT_WORD (fsg, fsgi); + sd = -__builtin_fmaf (sg, sg, -sx); + sy = __builtin_fmaf (e, sy2, sy); + if ((xi & 0x7f800000) == 0) + goto denorm; + shx = sx * fsg; + sg = __builtin_fmaf (sy, sd, sg); /* 32-bit approximation to + sqrt(sx), but perhaps + rounded incorrectly. */ + sy2 = sy + sy; + g = sg * fsg; + e = -__builtin_fmaf (sy, sg, -almost_half); + d = -__builtin_fmaf (g, sg, -shx); + sy = __builtin_fmaf (e, sy2, sy); + fesetenv_register (fe); + return __builtin_fmaf (sy, d, g); + denorm: + /* For denormalised numbers, we normalise, calculate the + square root, and return an adjusted result. */ + fesetenv_register (fe); + return __slow_ieee754_sqrtf (x * two48) * twom24; + } + } + else if (x < 0) + { + /* For some reason, some PowerPC32 processors don't implement + FE_INVALID_SQRT. */ +#ifdef FE_INVALID_SQRT + feraiseexcept (FE_INVALID_SQRT); + + fenv_union_t u = { .fenv = fegetenv_register () }; + if ((u.l & FE_INVALID) == 0) +#endif + feraiseexcept (FE_INVALID); + x = a_nan.value; + } + return f_washf (x); +} +#endif /* _ARCH_PPCSQ */ + +#undef __ieee754_sqrtf +float +__ieee754_sqrtf (float x) +{ + double z; + +#ifdef _ARCH_PPCSQ + asm ("fsqrts %0,%1\n" :"=f" (z):"f" (x)); +#else + z = __slow_ieee754_sqrtf (x); +#endif + + return z; +} +strong_alias (__ieee754_sqrtf, __sqrtf_finite) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fclrexcpt.c b/REORG.TODO/sysdeps/powerpc/fpu/fclrexcpt.c new file mode 100644 index 0000000000..2ee9547833 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fclrexcpt.c @@ -0,0 +1,49 @@ +/* Clear given exceptions in current floating-point environment. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +#undef feclearexcept +int +__feclearexcept (int excepts) +{ + fenv_union_t u, n; + + /* Get the current state. */ + u.fenv = fegetenv_register (); + + /* Clear the relevant bits. */ + n.l = u.l & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID) + | (excepts & FPSCR_STICKY_BITS)); + + /* Put the new state in effect. */ + if (u.l != n.l) + fesetenv_register (n.fenv); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feclearexcept, __old_feclearexcept) +compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1); +#endif + +libm_hidden_ver (__feclearexcept, feclearexcept) +versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fe_mask.c b/REORG.TODO/sysdeps/powerpc/fpu/fe_mask.c new file mode 100644 index 0000000000..bbe41a9d92 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fe_mask.c @@ -0,0 +1,32 @@ +/* Procedure definition for FE_MASK_ENV. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <errno.h> + +/* This is a generic stub. An OS specific override is required to clear + the FE0/FE1 bits in the MSR. MSR update is privileged, so this will + normally involve a syscall. */ + +const fenv_t * +__fe_mask_env(void) +{ + __set_errno (ENOSYS); + return FE_DFL_ENV; +} +stub_warning (__fe_mask_env) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fe_nomask.c b/REORG.TODO/sysdeps/powerpc/fpu/fe_nomask.c new file mode 100644 index 0000000000..3b42aedd15 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fe_nomask.c @@ -0,0 +1,32 @@ +/* Procedure definition for FE_NOMASK_ENV. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <errno.h> + +/* This is a generic stub. An OS specific override is required to set + the FE0/FE1 bits in the MSR. MSR update is privileged, so this will + normally involve a syscall. */ + +const fenv_t * +__fe_nomask_env_priv (void) +{ + __set_errno (ENOSYS); + return FE_ENABLED_ENV; +} +stub_warning (__fe_nomask_env_priv) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fedisblxcpt.c b/REORG.TODO/sysdeps/powerpc/fpu/fedisblxcpt.c new file mode 100644 index 0000000000..bb10b4701a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fedisblxcpt.c @@ -0,0 +1,57 @@ +/* Disable floating-point exceptions. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Geoffrey Keating <geoffk@geoffk.org>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fedisableexcept (int excepts) +{ + fenv_union_t fe, curr; + int result, new; + + /* Get current exception mask to return. */ + fe.fenv = curr.fenv = fegetenv_register (); + result = fenv_reg_to_exceptions (fe.l); + + if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID) + excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID; + + /* Sets the new exception mask. */ + if (excepts & FE_INEXACT) + fe.l &= ~(1 << (31 - FPSCR_XE)); + if (excepts & FE_DIVBYZERO) + fe.l &= ~(1 << (31 - FPSCR_ZE)); + if (excepts & FE_UNDERFLOW) + fe.l &= ~(1 << (31 - FPSCR_UE)); + if (excepts & FE_OVERFLOW) + fe.l &= ~(1 << (31 - FPSCR_OE)); + if (excepts & FE_INVALID) + fe.l &= ~(1 << (31 - FPSCR_VE)); + + if (fe.l != curr.l) + fesetenv_register (fe.fenv); + + new = __fegetexcept (); + if (new == 0 && result != 0) + (void)__fe_mask_env (); + + if ((new & excepts) != 0) + result = -1; + return result; +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu/feenablxcpt.c b/REORG.TODO/sysdeps/powerpc/fpu/feenablxcpt.c new file mode 100644 index 0000000000..7f0b333fc6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/feenablxcpt.c @@ -0,0 +1,58 @@ +/* Enable floating-point exceptions. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Geoffrey Keating <geoffk@geoffk.org>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +feenableexcept (int excepts) +{ + fenv_union_t fe, curr; + int result, new; + + /* Get current exception mask to return. */ + fe.fenv = curr.fenv = fegetenv_register (); + result = fenv_reg_to_exceptions (fe.l); + + if ((excepts & FE_ALL_INVALID) == FE_ALL_INVALID) + excepts = (excepts | FE_INVALID) & ~ FE_ALL_INVALID; + + /* Sets the new exception mask. */ + if (excepts & FE_INEXACT) + fe.l |= (1 << (31 - FPSCR_XE)); + if (excepts & FE_DIVBYZERO) + fe.l |= (1 << (31 - FPSCR_ZE)); + if (excepts & FE_UNDERFLOW) + fe.l |= (1 << (31 - FPSCR_UE)); + if (excepts & FE_OVERFLOW) + fe.l |= (1 << (31 - FPSCR_OE)); + if (excepts & FE_INVALID) + fe.l |= (1 << (31 - FPSCR_VE)); + + if (fe.l != curr.l) + fesetenv_register (fe.fenv); + + new = __fegetexcept (); + if (new != 0 && result == 0) + (void) __fe_nomask_env_priv (); + + if ((new & excepts) != excepts) + result = -1; + + return result; +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fegetenv.c b/REORG.TODO/sysdeps/powerpc/fpu/fegetenv.c new file mode 100644 index 0000000000..251977beba --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fegetenv.c @@ -0,0 +1,38 @@ +/* Store current floating-point environment. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fegetenv (fenv_t *envp) +{ + *envp = fegetenv_register (); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fegetenv, __old_fegetenv) +compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1); +#endif + +libm_hidden_def (__fegetenv) +libm_hidden_ver (__fegetenv, fegetenv) +versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fegetexcept.c b/REORG.TODO/sysdeps/powerpc/fpu/fegetexcept.c new file mode 100644 index 0000000000..2b9899abe2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fegetexcept.c @@ -0,0 +1,43 @@ +/* Get floating-point exceptions. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Geoffrey Keating <geoffk@geoffk.org>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fegetexcept (void) +{ + fenv_union_t fe; + int result = 0; + + fe.fenv = fegetenv_register (); + + if (fe.l & (1 << (31 - FPSCR_XE))) + result |= FE_INEXACT; + if (fe.l & (1 << (31 - FPSCR_ZE))) + result |= FE_DIVBYZERO; + if (fe.l & (1 << (31 - FPSCR_UE))) + result |= FE_UNDERFLOW; + if (fe.l & (1 << (31 - FPSCR_OE))) + result |= FE_OVERFLOW; + if (fe.l & (1 << (31 - FPSCR_VE))) + result |= FE_INVALID; + + return result; +} +weak_alias (__fegetexcept, fegetexcept) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fegetmode.c b/REORG.TODO/sysdeps/powerpc/fpu/fegetmode.c new file mode 100644 index 0000000000..5597000a4b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fegetmode.c @@ -0,0 +1,26 @@ +/* Store current floating-point control modes. PowerPC version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fegetmode (femode_t *modep) +{ + *modep = fegetenv_register (); + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fegetround.c b/REORG.TODO/sysdeps/powerpc/fpu/fegetround.c new file mode 100644 index 0000000000..bedb02f0e5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fegetround.c @@ -0,0 +1,30 @@ +/* Return current rounding direction. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +(__fegetround) (void) +{ + return __fegetround(); +} +#undef fegetround +#undef __fegetround +libm_hidden_def (__fegetround) +weak_alias (__fegetround, fegetround) +libm_hidden_weak (fegetround) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/feholdexcpt.c b/REORG.TODO/sysdeps/powerpc/fpu/feholdexcpt.c new file mode 100644 index 0000000000..fef49c33a9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/feholdexcpt.c @@ -0,0 +1,51 @@ +/* Store current floating-point environment and clear exceptions. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <fpu_control.h> +#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM | _FPU_MASK_XM | _FPU_MASK_IM) + +int +__feholdexcept (fenv_t *envp) +{ + fenv_union_t old, new; + + /* Save the currently set exceptions. */ + old.fenv = *envp = fegetenv_register (); + + /* Clear everything except for the rounding modes and non-IEEE arithmetic + flag. */ + new.l = old.l & 0xffffffff00000007LL; + + if (new.l == old.l) + return 0; + + /* If the old env had any enabled exceptions, then mask SIGFPE in the + MSR FE0/FE1 bits. This may allow the FPU to run faster because it + always takes the default action and can not generate SIGFPE. */ + if ((old.l & _FPU_MASK_ALL) != 0) + (void)__fe_mask_env (); + + /* Put the new state in effect. */ + fesetenv_register (new.fenv); + + return 0; +} +libm_hidden_def (__feholdexcept) +weak_alias (__feholdexcept, feholdexcept) +libm_hidden_weak (feholdexcept) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fenv_const.c b/REORG.TODO/sysdeps/powerpc/fpu/fenv_const.c new file mode 100644 index 0000000000..c5e088c98e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fenv_const.c @@ -0,0 +1,36 @@ +/* Constants for fenv_bits.h. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* We want to specify the bit pattern of the __fe_*_env constants, so + pretend they're really `long long' instead of `double'. */ + +/* If the default argument is used we use this value. */ +const unsigned long long __fe_dfl_env __attribute__ ((aligned (8))) = +0xfff8000000000000ULL; + +/* The same representation is used for femode_t. */ +extern const unsigned long long __fe_dfl_mode + __attribute__ ((aligned (8), alias ("__fe_dfl_env"))); + +/* Floating-point environment where none of the exceptions are masked. */ +const unsigned long long __fe_enabled_env __attribute__ ((aligned (8))) = +0xfff80000000000f8ULL; + +/* Floating-point environment with the NI bit set. */ +const unsigned long long __fe_nonieee_env __attribute__ ((aligned (8))) = +0xfff8000000000004ULL; diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fenv_libc.h b/REORG.TODO/sysdeps/powerpc/fpu/fenv_libc.h new file mode 100644 index 0000000000..500ac042b4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fenv_libc.h @@ -0,0 +1,176 @@ +/* Internal libc stuff for floating point environment routines. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _FENV_LIBC_H +#define _FENV_LIBC_H 1 + +#include <fenv.h> +#include <ldsodefs.h> +#include <sysdep.h> + +extern const fenv_t *__fe_nomask_env_priv (void); + +extern const fenv_t *__fe_mask_env (void) attribute_hidden; + +/* The sticky bits in the FPSCR indicating exceptions have occurred. */ +#define FPSCR_STICKY_BITS ((FE_ALL_EXCEPT | FE_ALL_INVALID) & ~FE_INVALID) + +/* Equivalent to fegetenv, but returns a fenv_t instead of taking a + pointer. */ +#define fegetenv_register() \ + ({ fenv_t env; asm volatile ("mffs %0" : "=f" (env)); env; }) + +/* Equivalent to fesetenv, but takes a fenv_t instead of a pointer. */ +#define fesetenv_register(env) \ + do { \ + double d = (env); \ + if(GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \ + asm volatile (".machine push; " \ + ".machine \"power6\"; " \ + "mtfsf 0xff,%0,1,0; " \ + ".machine pop" : : "f" (d)); \ + else \ + asm volatile ("mtfsf 0xff,%0" : : "f" (d)); \ + } while(0) + +/* This very handy macro: + - Sets the rounding mode to 'round to nearest'; + - Sets the processor into IEEE mode; and + - Prevents exceptions from being raised for inexact results. + These things happen to be exactly what you need for typical elementary + functions. */ +#define relax_fenv_state() \ + do { \ + if (GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \ + asm (".machine push; .machine \"power6\"; " \ + "mtfsfi 7,0,1; .machine pop"); \ + asm ("mtfsfi 7,0"); \ + } while(0) + +/* Set/clear a particular FPSCR bit (for instance, + reset_fpscr_bit(FPSCR_VE); + prevents INVALID exceptions from being raised). */ +#define set_fpscr_bit(x) asm volatile ("mtfsb1 %0" : : "i"(x)) +#define reset_fpscr_bit(x) asm volatile ("mtfsb0 %0" : : "i"(x)) + +typedef union +{ + fenv_t fenv; + unsigned long long l; +} fenv_union_t; + + +static inline int +__fesetround_inline (int round) +{ + if ((unsigned int) round < 2) + { + asm volatile ("mtfsb0 30"); + if ((unsigned int) round == 0) + asm volatile ("mtfsb0 31"); + else + asm volatile ("mtfsb1 31"); + } + else + { + asm volatile ("mtfsb1 30"); + if ((unsigned int) round == 2) + asm volatile ("mtfsb0 31"); + else + asm volatile ("mtfsb1 31"); + } + + return 0; +} + +/* Definitions of all the FPSCR bit numbers */ +enum { + FPSCR_FX = 0, /* exception summary */ + FPSCR_FEX, /* enabled exception summary */ + FPSCR_VX, /* invalid operation summary */ + FPSCR_OX, /* overflow */ + FPSCR_UX, /* underflow */ + FPSCR_ZX, /* zero divide */ + FPSCR_XX, /* inexact */ + FPSCR_VXSNAN, /* invalid operation for sNaN */ + FPSCR_VXISI, /* invalid operation for Inf-Inf */ + FPSCR_VXIDI, /* invalid operation for Inf/Inf */ + FPSCR_VXZDZ, /* invalid operation for 0/0 */ + FPSCR_VXIMZ, /* invalid operation for Inf*0 */ + FPSCR_VXVC, /* invalid operation for invalid compare */ + FPSCR_FR, /* fraction rounded [fraction was incremented by round] */ + FPSCR_FI, /* fraction inexact */ + FPSCR_FPRF_C, /* result class descriptor */ + FPSCR_FPRF_FL, /* result less than (usually, less than 0) */ + FPSCR_FPRF_FG, /* result greater than */ + FPSCR_FPRF_FE, /* result equal to */ + FPSCR_FPRF_FU, /* result unordered */ + FPSCR_20, /* reserved */ + FPSCR_VXSOFT, /* invalid operation set by software */ + FPSCR_VXSQRT, /* invalid operation for square root */ + FPSCR_VXCVI, /* invalid operation for invalid integer convert */ + FPSCR_VE, /* invalid operation exception enable */ + FPSCR_OE, /* overflow exception enable */ + FPSCR_UE, /* underflow exception enable */ + FPSCR_ZE, /* zero divide exception enable */ + FPSCR_XE, /* inexact exception enable */ +#ifdef _ARCH_PWR6 + FPSCR_29, /* Reserved in ISA 2.05 */ +#else + FPSCR_NI /* non-IEEE mode (typically, no denormalised numbers) */ +#endif /* _ARCH_PWR6 */ + /* the remaining two least-significant bits keep the rounding mode */ +}; + +static inline int +fenv_reg_to_exceptions (unsigned long long l) +{ + int result = 0; + if (l & (1 << (31 - FPSCR_XE))) + result |= FE_INEXACT; + if (l & (1 << (31 - FPSCR_ZE))) + result |= FE_DIVBYZERO; + if (l & (1 << (31 - FPSCR_UE))) + result |= FE_UNDERFLOW; + if (l & (1 << (31 - FPSCR_OE))) + result |= FE_OVERFLOW; + if (l & (1 << (31 - FPSCR_VE))) + result |= FE_INVALID; + return result; +} + +#ifdef _ARCH_PWR6 + /* Not supported in ISA 2.05. Provided for source compat only. */ +# define FPSCR_NI 29 +#endif /* _ARCH_PWR6 */ + +/* This operation (i) sets the appropriate FPSCR bits for its + parameter, (ii) converts sNaN to the corresponding qNaN, and (iii) + otherwise passes its parameter through unchanged (in particular, -0 + and +0 stay as they were). The `obvious' way to do this is optimised + out by gcc. */ +#define f_wash(x) \ + ({ double d; asm volatile ("fmul %0,%1,%2" \ + : "=f"(d) \ + : "f" (x), "f"((float)1.0)); d; }) +#define f_washf(x) \ + ({ float f; asm volatile ("fmuls %0,%1,%2" \ + : "=f"(f) \ + : "f" (x), "f"((float)1.0)); f; }) + +#endif /* fenv_libc.h */ diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fenv_private.h b/REORG.TODO/sysdeps/powerpc/fpu/fenv_private.h new file mode 100644 index 0000000000..877f25bcf2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fenv_private.h @@ -0,0 +1,229 @@ +/* Private floating point rounding and exceptions handling. PowerPC version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef FENV_PRIVATE_H +#define FENV_PRIVATE_H 1 + +#include <fenv.h> +#include <fenv_libc.h> +#include <fpu_control.h> + +/* Mask for the exception enable bits. */ +#define _FPU_ALL_TRAPS (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM \ + | _FPU_MASK_XM | _FPU_MASK_IM) + +/* Mask the rounding mode bits. */ +#define _FPU_MASK_RN (~0x3) + +/* Mask everything but the rounding moded and non-IEEE arithmetic flags. */ +#define _FPU_MASK_NOT_RN_NI 0xffffffff00000007LL + +/* Mask restore rounding mode and exception enabled. */ +#define _FPU_MASK_TRAPS_RN 0xffffffff1fffff00LL + +/* Mask exception enable but fraction rounded/inexact and FP result/CC + bits. */ +#define _FPU_MASK_FRAC_INEX_RET_CC 0xffffffff1ff80fff + +static __always_inline void +__libc_feholdbits_ppc (fenv_t *envp, unsigned long long mask, + unsigned long long bits) +{ + fenv_union_t old, new; + + old.fenv = *envp = fegetenv_register (); + + new.l = (old.l & mask) | bits; + + /* If the old env had any enabled exceptions, then mask SIGFPE in the + MSR FE0/FE1 bits. This may allow the FPU to run faster because it + always takes the default action and can not generate SIGFPE. */ + if ((old.l & _FPU_ALL_TRAPS) != 0) + (void) __fe_mask_env (); + + fesetenv_register (new.fenv); +} + +static __always_inline void +libc_feholdexcept_ppc (fenv_t *envp) +{ + __libc_feholdbits_ppc (envp, _FPU_MASK_NOT_RN_NI, 0LL); +} + +static __always_inline void +libc_feholdexcept_setround_ppc (fenv_t *envp, int r) +{ + __libc_feholdbits_ppc (envp, _FPU_MASK_NOT_RN_NI & _FPU_MASK_RN, r); +} + +static __always_inline void +libc_fesetround_ppc (int r) +{ + __fesetround_inline (r); +} + +static __always_inline int +libc_fetestexcept_ppc (int e) +{ + fenv_union_t u; + u.fenv = fegetenv_register (); + return u.l & e; +} + +static __always_inline void +libc_feholdsetround_ppc (fenv_t *e, int r) +{ + __libc_feholdbits_ppc (e, _FPU_MASK_TRAPS_RN, r); +} + +static __always_inline unsigned long long +__libc_femergeenv_ppc (const fenv_t *envp, unsigned long long old_mask, + unsigned long long new_mask) +{ + fenv_union_t old, new; + + new.fenv = *envp; + old.fenv = fegetenv_register (); + + /* Merge bits while masking unwanted bits from new and old env. */ + new.l = (old.l & old_mask) | (new.l & new_mask); + + /* If the old env has no enabled exceptions and the new env has any enabled + exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the + hardware into "precise mode" and may cause the FPU to run slower on some + hardware. */ + if ((old.l & _FPU_ALL_TRAPS) == 0 && (new.l & _FPU_ALL_TRAPS) != 0) + (void) __fe_nomask_env_priv (); + + /* If the old env had any enabled exceptions and the new env has no enabled + exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the + FPU to run faster because it always takes the default action and can not + generate SIGFPE. */ + if ((old.l & _FPU_ALL_TRAPS) != 0 && (new.l & _FPU_ALL_TRAPS) == 0) + (void) __fe_mask_env (); + + /* Atomically enable and raise (if appropriate) exceptions set in `new'. */ + fesetenv_register (new.fenv); + + return old.l; +} + +static __always_inline void +libc_fesetenv_ppc (const fenv_t *envp) +{ + /* Replace the entire environment. */ + __libc_femergeenv_ppc (envp, 0LL, -1LL); +} + +static __always_inline void +libc_feresetround_ppc (fenv_t *envp) +{ + __libc_femergeenv_ppc (envp, _FPU_MASK_TRAPS_RN, _FPU_MASK_FRAC_INEX_RET_CC); +} + +static __always_inline int +libc_feupdateenv_test_ppc (fenv_t *envp, int ex) +{ + return __libc_femergeenv_ppc (envp, _FPU_MASK_TRAPS_RN, + _FPU_MASK_FRAC_INEX_RET_CC) & ex; +} + +static __always_inline void +libc_feupdateenv_ppc (fenv_t *e) +{ + libc_feupdateenv_test_ppc (e, 0); +} + +#define libc_feholdexceptf libc_feholdexcept_ppc +#define libc_feholdexcept libc_feholdexcept_ppc +#define libc_feholdexcept_setroundf libc_feholdexcept_setround_ppc +#define libc_feholdexcept_setround libc_feholdexcept_setround_ppc +#define libc_fetestexceptf libc_fetestexcept_ppc +#define libc_fetestexcept libc_fetestexcept_ppc +#define libc_fesetroundf libc_fesetround_ppc +#define libc_fesetround libc_fesetround_ppc +#define libc_fesetenvf libc_fesetenv_ppc +#define libc_fesetenv libc_fesetenv_ppc +#define libc_feupdateenv_testf libc_feupdateenv_test_ppc +#define libc_feupdateenv_test libc_feupdateenv_test_ppc +#define libc_feupdateenvf libc_feupdateenv_ppc +#define libc_feupdateenv libc_feupdateenv_ppc +#define libc_feholdsetroundf libc_feholdsetround_ppc +#define libc_feholdsetround libc_feholdsetround_ppc +#define libc_feresetroundf libc_feresetround_ppc +#define libc_feresetround libc_feresetround_ppc + + +/* We have support for rounding mode context. */ +#define HAVE_RM_CTX 1 + +static __always_inline void +libc_feholdsetround_ppc_ctx (struct rm_ctx *ctx, int r) +{ + fenv_union_t old, new; + + old.fenv = fegetenv_register (); + + new.l = (old.l & _FPU_MASK_TRAPS_RN) | r; + + ctx->env = old.fenv; + if (__glibc_unlikely (new.l != old.l)) + { + if ((old.l & _FPU_ALL_TRAPS) != 0) + (void) __fe_mask_env (); + fesetenv_register (new.fenv); + ctx->updated_status = true; + } + else + ctx->updated_status = false; +} + +static __always_inline void +libc_fesetenv_ppc_ctx (struct rm_ctx *ctx) +{ + libc_fesetenv_ppc (&ctx->env); +} + +static __always_inline void +libc_feupdateenv_ppc_ctx (struct rm_ctx *ctx) +{ + if (__glibc_unlikely (ctx->updated_status)) + libc_feresetround_ppc (&ctx->env); +} + +static __always_inline void +libc_feresetround_ppc_ctx (struct rm_ctx *ctx) +{ + if (__glibc_unlikely (ctx->updated_status)) + libc_feresetround_ppc (&ctx->env); +} + +#define libc_fesetenv_ctx libc_fesetenv_ppc_ctx +#define libc_fesetenvf_ctx libc_fesetenv_ppc_ctx +#define libc_fesetenvl_ctx libc_fesetenv_ppc_ctx +#define libc_feholdsetround_ctx libc_feholdsetround_ppc_ctx +#define libc_feholdsetroundf_ctx libc_feholdsetround_ppc_ctx +#define libc_feholdsetroundl_ctx libc_feholdsetround_ppc_ctx +#define libc_feresetround_ctx libc_feresetround_ppc_ctx +#define libc_feresetroundf_ctx libc_feresetround_ppc_ctx +#define libc_feresetroundl_ctx libc_feresetround_ppc_ctx +#define libc_feupdateenv_ctx libc_feupdateenv_ppc_ctx +#define libc_feupdateenvf_ctx libc_feupdateenv_ppc_ctx +#define libc_feupdateenvl_ctx libc_feupdateenv_ppc_ctx + +#endif diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fesetenv.c b/REORG.TODO/sysdeps/powerpc/fpu/fesetenv.c new file mode 100644 index 0000000000..7208ab455a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fesetenv.c @@ -0,0 +1,63 @@ +/* Install given floating-point environment. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <fpu_control.h> + +#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM | _FPU_MASK_XM | _FPU_MASK_IM) + +int +__fesetenv (const fenv_t *envp) +{ + fenv_union_t old, new; + + /* get the currently set exceptions. */ + new.fenv = *envp; + old.fenv = fegetenv_register (); + if (old.l == new.l) + return 0; + + /* If the old env has no enabled exceptions and the new env has any enabled + exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the + hardware into "precise mode" and may cause the FPU to run slower on some + hardware. */ + if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0) + (void) __fe_nomask_env_priv (); + + /* If the old env had any enabled exceptions and the new env has no enabled + exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the + FPU to run faster because it always takes the default action and can not + generate SIGFPE. */ + if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0) + (void)__fe_mask_env (); + + fesetenv_register (*envp); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fesetenv, __old_fesetenv) +compat_symbol (libm, __old_fesetenv, fesetenv, GLIBC_2_1); +#endif + +libm_hidden_def (__fesetenv) +libm_hidden_ver (__fesetenv, fesetenv) +versioned_symbol (libm, __fesetenv, fesetenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fesetexcept.c b/REORG.TODO/sysdeps/powerpc/fpu/fesetexcept.c new file mode 100644 index 0000000000..47ea8e499d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fesetexcept.c @@ -0,0 +1,42 @@ +/* Set given exception flags. PowerPC version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fesetexcept (int excepts) +{ + fenv_union_t u, n; + + u.fenv = fegetenv_register (); + n.l = (u.l + | (excepts & FPSCR_STICKY_BITS) + /* Turn FE_INVALID into FE_INVALID_SOFTWARE. */ + | (excepts >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT)) + & FE_INVALID_SOFTWARE)); + if (n.l != u.l) + { + fesetenv_register (n.fenv); + + /* Deal with FE_INVALID_SOFTWARE not being implemented on some chips. */ + if (excepts & FE_INVALID) + feraiseexcept (FE_INVALID); + } + + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fesetmode.c b/REORG.TODO/sysdeps/powerpc/fpu/fesetmode.c new file mode 100644 index 0000000000..794f762898 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fesetmode.c @@ -0,0 +1,49 @@ +/* Install given floating-point control modes. PowerPC version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <fpu_control.h> + +#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM \ + | _FPU_MASK_XM | _FPU_MASK_IM) + +#define FPU_STATUS 0xbffff700ULL + +int +fesetmode (const femode_t *modep) +{ + fenv_union_t old, new; + + /* Logic regarding enabled exceptions as in fesetenv. */ + + new.fenv = *modep; + old.fenv = fegetenv_register (); + new.l = (new.l & ~FPU_STATUS) | (old.l & FPU_STATUS); + + if (old.l == new.l) + return 0; + + if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0) + (void) __fe_nomask_env_priv (); + + if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0) + (void) __fe_mask_env (); + + fesetenv_register (new.fenv); + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fesetround.c b/REORG.TODO/sysdeps/powerpc/fpu/fesetround.c new file mode 100644 index 0000000000..a041f1add9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fesetround.c @@ -0,0 +1,33 @@ +/* Set current rounding direction. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +#undef fesetround +int +__fesetround (int round) +{ + if ((unsigned int) round > 3) + return 1; + else + return __fesetround_inline(round); +} +libm_hidden_def (__fesetround) +weak_alias (__fesetround, fesetround) +libm_hidden_weak (fesetround) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/feupdateenv.c b/REORG.TODO/sysdeps/powerpc/fpu/feupdateenv.c new file mode 100644 index 0000000000..551cc1734d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/feupdateenv.c @@ -0,0 +1,68 @@ +/* Install given floating-point environment and raise exceptions. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <fpu_control.h> + +#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM | _FPU_MASK_XM | _FPU_MASK_IM) + +int +__feupdateenv (const fenv_t *envp) +{ + fenv_union_t old, new; + + /* Save the currently set exceptions. */ + new.fenv = *envp; + old.fenv = fegetenv_register (); + + /* Restore rounding mode and exception enable from *envp and merge + exceptions. Leave fraction rounded/inexact and FP result/CC bits + unchanged. */ + new.l = (old.l & 0xffffffff1fffff00LL) | (new.l & 0x1ff80fff); + + /* If the old env has no enabled exceptions and the new env has any enabled + exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put + the hardware into "precise mode" and may cause the FPU to run slower on + some hardware. */ + if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0) + (void) __fe_nomask_env_priv (); + + /* If the old env had any enabled exceptions and the new env has no enabled + exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the + FPU to run faster because it always takes the default action and can not + generate SIGFPE. */ + if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0) + (void)__fe_mask_env (); + + /* Atomically enable and raise (if appropriate) exceptions set in `new'. */ + fesetenv_register (new.fenv); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feupdateenv, __old_feupdateenv) +compat_symbol (libm, __old_feupdateenv, feupdateenv, GLIBC_2_1); +#endif + +libm_hidden_def (__feupdateenv) +libm_hidden_ver (__feupdateenv, feupdateenv) +versioned_symbol (libm, __feupdateenv, feupdateenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fgetexcptflg.c b/REORG.TODO/sysdeps/powerpc/fpu/fgetexcptflg.c new file mode 100644 index 0000000000..a11b8f3323 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fgetexcptflg.c @@ -0,0 +1,42 @@ +/* Store current representation for exceptions. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fegetexceptflag (fexcept_t *flagp, int excepts) +{ + fenv_union_t u; + + /* Get the current state. */ + u.fenv = fegetenv_register (); + + /* Return (all of) it. */ + *flagp = u.l & excepts & FE_ALL_EXCEPT; + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fegetexceptflag, __old_fegetexceptflag) +compat_symbol (libm, __old_fegetexceptflag, fegetexceptflag, GLIBC_2_1); +#endif + +versioned_symbol (libm, __fegetexceptflag, fegetexceptflag, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fix-fp-int-compare-invalid.h b/REORG.TODO/sysdeps/powerpc/fpu/fix-fp-int-compare-invalid.h new file mode 100644 index 0000000000..17ac1d2c83 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fix-fp-int-compare-invalid.h @@ -0,0 +1,28 @@ +/* Fix for missing "invalid" exceptions from floating-point + comparisons. PowerPC version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef FIX_FP_INT_COMPARE_INVALID_H +#define FIX_FP_INT_COMPARE_INVALID_H 1 + +/* As of GCC 5, comparisons use unordered comparison instructions when + they should use ordered comparisons + <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58684>. */ +#define FIX_COMPARE_INVALID 1 + +#endif /* fix-fp-int-compare-invalid.h */ diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fraiseexcpt.c b/REORG.TODO/sysdeps/powerpc/fpu/fraiseexcpt.c new file mode 100644 index 0000000000..05f5cb6309 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fraiseexcpt.c @@ -0,0 +1,68 @@ +/* Raise given exceptions. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +#undef feraiseexcept +int +__feraiseexcept (int excepts) +{ + fenv_union_t u; + + /* Raise exceptions represented by EXCEPTS. It is the responsibility of + the OS to ensure that if multiple exceptions occur they are fed back + to this process in the proper way; this can happen in hardware, + anyway (in particular, inexact with overflow or underflow). */ + + /* Get the current state. */ + u.fenv = fegetenv_register (); + + /* Add the exceptions */ + u.l = (u.l + | (excepts & FPSCR_STICKY_BITS) + /* Turn FE_INVALID into FE_INVALID_SOFTWARE. */ + | (excepts >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT)) + & FE_INVALID_SOFTWARE)); + + /* Store the new status word (along with the rest of the environment), + triggering any appropriate exceptions. */ + fesetenv_register (u.fenv); + + if ((excepts & FE_INVALID)) + { + /* For some reason, some PowerPC chips (the 601, in particular) + don't have FE_INVALID_SOFTWARE implemented. Detect this + case and raise FE_INVALID_SNAN instead. */ + u.fenv = fegetenv_register (); + if ((u.l & FE_INVALID) == 0) + set_fpscr_bit (FPSCR_VXSNAN); + } + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feraiseexcept, __old_feraiseexcept) +compat_symbol (libm, __old_feraiseexcept, feraiseexcept, GLIBC_2_1); +#endif + +libm_hidden_def (__feraiseexcept) +libm_hidden_ver (__feraiseexcept, feraiseexcept) +versioned_symbol (libm, __feraiseexcept, feraiseexcept, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/fpu/fsetexcptflg.c b/REORG.TODO/sysdeps/powerpc/fpu/fsetexcptflg.c new file mode 100644 index 0000000000..d5c0963688 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/fsetexcptflg.c @@ -0,0 +1,63 @@ +/* Set floating-point environment exception handling. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fesetexceptflag (const fexcept_t *flagp, int excepts) +{ + fenv_union_t u, n; + fexcept_t flag; + + /* Get the current state. */ + u.fenv = fegetenv_register (); + + /* Ignore exceptions not listed in 'excepts'. */ + flag = *flagp & excepts; + + /* Replace the exception status */ + int excepts_mask = FPSCR_STICKY_BITS & excepts; + if ((excepts & FE_INVALID) != 0) + excepts_mask |= FE_ALL_INVALID; + n.l = ((u.l & ~excepts_mask) + | (flag & FPSCR_STICKY_BITS) + /* Turn FE_INVALID into FE_INVALID_SOFTWARE. */ + | (flag >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT)) + & FE_INVALID_SOFTWARE)); + + /* Store the new status word (along with the rest of the environment). + This may cause floating-point exceptions if the restored state + requests it. */ + if (n.l != u.l) + fesetenv_register (n.fenv); + + /* Deal with FE_INVALID_SOFTWARE not being implemented on some chips. */ + if (flag & FE_INVALID) + feraiseexcept(FE_INVALID); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fesetexceptflag, __old_fesetexceptflag) +compat_symbol (libm, __old_fesetexceptflag, fesetexceptflag, GLIBC_2_1); +#endif + +versioned_symbol (libm, __fesetexceptflag, fesetexceptflag, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/fpu/ftestexcept.c b/REORG.TODO/sysdeps/powerpc/fpu/ftestexcept.c new file mode 100644 index 0000000000..8f2ecad509 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/ftestexcept.c @@ -0,0 +1,33 @@ +/* Test exception in current environment. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fetestexcept (int excepts) +{ + fenv_union_t u; + + /* Get the current state. */ + u.fenv = fegetenv_register (); + + /* The FE_INVALID bit is dealt with correctly by the hardware, so we can + just: */ + return u.l & excepts; +} +libm_hidden_def (fetestexcept) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/k_cosf.c b/REORG.TODO/sysdeps/powerpc/fpu/k_cosf.c new file mode 100644 index 0000000000..b9e31dc64d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/k_cosf.c @@ -0,0 +1,65 @@ +/* k_cosf.c -- float version of k_cos.c + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <fenv.h> +#include <math_private.h> + +static const float twom27 = 7.4505806e-09; +static const float dot3 = 3.0000001e-01; +static const float dot78125 = 7.8125000e-01; + +static const float one = 1.0000000000e+00; +static const float C1 = 4.1666667908e-02; +static const float C2 = -1.3888889225e-03; +static const float C3 = 2.4801587642e-05; +static const float C4 = -2.7557314297e-07; +static const float C5 = 2.0875723372e-09; +static const float C6 = -1.1359647598e-11; + +float +__kernel_cosf (float x, float y) +{ + float a, hz, z, r, qx; + float ix; + ix = __builtin_fabsf (x); + if (ix < twom27) + { /* |x| < 2**-27 */ + __feraiseexcept (FE_INEXACT); + return one; + } + z = x * x; + r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6))))); + if (ix < dot3) /* if |x| < 0.3 */ + return one - ((float) 0.5 * z - (z * r - x * y)); + else + { + if (ix > dot78125) + { /* x > 0.78125 */ + qx = (float) 0.28125; + } + else + { + qx = ix / 4.0; + } + hz = (float) 0.5 *z - qx; + a = one - qx; + return a - (hz - (z * r - x * y)); + } +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu/k_rem_pio2f.c b/REORG.TODO/sysdeps/powerpc/fpu/k_rem_pio2f.c new file mode 100644 index 0000000000..04ed62055a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/k_rem_pio2f.c @@ -0,0 +1,273 @@ +/* k_rem_pio2f.c -- float version of e_rem_pio2.c + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#include <math_private.h> +#include "s_float_bitwise.h" + + +static const float two_over_pi[] = { + 1.62000000e+02, 2.49000000e+02, 1.31000000e+02, 1.10000000e+02, + 7.80000000e+01, 6.80000000e+01, 2.10000000e+01, 4.10000000e+01, + 2.52000000e+02, 3.90000000e+01, 8.70000000e+01, 2.09000000e+02, + 2.45000000e+02, 5.20000000e+01, 2.21000000e+02, 1.92000000e+02, + 2.19000000e+02, 9.80000000e+01, 1.49000000e+02, 1.53000000e+02, + 6.00000000e+01, 6.70000000e+01, 1.44000000e+02, 6.50000000e+01, + 2.54000000e+02, 8.10000000e+01, 9.90000000e+01, 1.71000000e+02, + 2.22000000e+02, 1.87000000e+02, 1.97000000e+02, 9.70000000e+01, + 1.83000000e+02, 3.60000000e+01, 1.10000000e+02, 5.80000000e+01, + 6.60000000e+01, 7.70000000e+01, 2.10000000e+02, 2.24000000e+02, + 6.00000000e+00, 7.30000000e+01, 4.60000000e+01, 2.34000000e+02, + 9.00000000e+00, 2.09000000e+02, 1.46000000e+02, 2.80000000e+01, + 2.54000000e+02, 2.90000000e+01, 2.35000000e+02, 2.80000000e+01, + 1.77000000e+02, 4.10000000e+01, 1.67000000e+02, 6.20000000e+01, + 2.32000000e+02, 1.30000000e+02, 5.30000000e+01, 2.45000000e+02, + 4.60000000e+01, 1.87000000e+02, 6.80000000e+01, 1.32000000e+02, + 2.33000000e+02, 1.56000000e+02, 1.12000000e+02, 3.80000000e+01, + 1.80000000e+02, 9.50000000e+01, 1.26000000e+02, 6.50000000e+01, + 5.70000000e+01, 1.45000000e+02, 2.14000000e+02, 5.70000000e+01, + 1.31000000e+02, 8.30000000e+01, 5.70000000e+01, 2.44000000e+02, + 1.56000000e+02, 1.32000000e+02, 9.50000000e+01, 1.39000000e+02, + 1.89000000e+02, 2.49000000e+02, 4.00000000e+01, 5.90000000e+01, + 3.10000000e+01, 2.48000000e+02, 1.51000000e+02, 2.55000000e+02, + 2.22000000e+02, 5.00000000e+00, 1.52000000e+02, 1.50000000e+01, + 2.39000000e+02, 4.70000000e+01, 1.70000000e+01, 1.39000000e+02, + 9.00000000e+01, 1.00000000e+01, 1.09000000e+02, 3.10000000e+01, + 1.09000000e+02, 5.40000000e+01, 1.26000000e+02, 2.07000000e+02, + 3.90000000e+01, 2.03000000e+02, 9.00000000e+00, 1.83000000e+02, + 7.90000000e+01, 7.00000000e+01, 6.30000000e+01, 1.02000000e+02, + 1.58000000e+02, 9.50000000e+01, 2.34000000e+02, 4.50000000e+01, + 1.17000000e+02, 3.90000000e+01, 1.86000000e+02, 1.99000000e+02, + 2.35000000e+02, 2.29000000e+02, 2.41000000e+02, 1.23000000e+02, + 6.10000000e+01, 7.00000000e+00, 5.70000000e+01, 2.47000000e+02, + 1.38000000e+02, 8.20000000e+01, 1.46000000e+02, 2.34000000e+02, + 1.07000000e+02, 2.51000000e+02, 9.50000000e+01, 1.77000000e+02, + 3.10000000e+01, 1.41000000e+02, 9.30000000e+01, 8.00000000e+00, + 8.60000000e+01, 3.00000000e+00, 4.80000000e+01, 7.00000000e+01, + 2.52000000e+02, 1.23000000e+02, 1.07000000e+02, 1.71000000e+02, + 2.40000000e+02, 2.07000000e+02, 1.88000000e+02, 3.20000000e+01, + 1.54000000e+02, 2.44000000e+02, 5.40000000e+01, 2.90000000e+01, + 1.69000000e+02, 2.27000000e+02, 1.45000000e+02, 9.70000000e+01, + 9.40000000e+01, 2.30000000e+02, 2.70000000e+01, 8.00000000e+00, + 1.01000000e+02, 1.53000000e+02, 1.33000000e+02, 9.50000000e+01, + 2.00000000e+01, 1.60000000e+02, 1.04000000e+02, 6.40000000e+01, + 1.41000000e+02, 2.55000000e+02, 2.16000000e+02, 1.28000000e+02, + 7.70000000e+01, 1.15000000e+02, 3.90000000e+01, 4.90000000e+01, + 6.00000000e+00, 6.00000000e+00, 2.10000000e+01, 8.60000000e+01, + 2.02000000e+02, 1.15000000e+02, 1.68000000e+02, 2.01000000e+02, + 9.60000000e+01, 2.26000000e+02, 1.23000000e+02, 1.92000000e+02, + 1.40000000e+02, 1.07000000e+02 +}; + + +static const float PIo2[] = { + 1.5703125000e+00, /* 0x3fc90000 */ + 4.5776367188e-04, /* 0x39f00000 */ + 2.5987625122e-05, /* 0x37da0000 */ + 7.5437128544e-08, /* 0x33a20000 */ + 6.0026650317e-11, /* 0x2e840000 */ + 7.3896444519e-13, /* 0x2b500000 */ + 5.3845816694e-15, /* 0x27c20000 */ + 5.6378512969e-18, /* 0x22d00000 */ + 8.3009228831e-20, /* 0x1fc40000 */ + 3.2756352257e-22, /* 0x1bc60000 */ + 6.3331015649e-25, /* 0x17440000 */ +}; + + +static const float zero = 0.0000000000e+00; +static const float one = 1.0000000000; +static const float twon8 = 3.9062500000e-03; +static const float two8 = 2.5600000000e+02; + + +int32_t +__fp_kernel_rem_pio2f (float *x, float *y, float e0, int32_t nx) +{ + int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih, exp; + float z, fw, f[20], fq[20], q[20]; + + /* initialize jk */ + jp = jk = 9; + + /* determine jx,jv,q0, note that 3>q0 */ + jx = nx - 1; + exp = __float_get_exp (e0) - 127; + jv = (exp - 3) / 8; + if (jv < 0) + jv = 0; + q0 = exp - 8 * (jv + 1); + + /* set up f[0] to f[jx+jk] where f[jx+jk] = two_over_pi[jv+jk] */ + j = jv - jx; + m = jx + jk; + for (i = 0; i <= m; i++, j++) + f[i] = (j < 0) ? zero : two_over_pi[j]; + + /* compute q[0],q[1],...q[jk] */ + for (i = 0; i <= jk; i++) + { + for (j = 0, fw = 0.0; j <= jx; j++) + fw += x[j] * f[jx + i - j]; + q[i] = fw; + } + + jz = jk; +recompute: + /* distill q[] into iq[] reversingly */ + for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) + { + fw = __truncf (twon8 * z); + iq[i] = (int32_t) (z - two8 * fw); + z = q[j - 1] + fw; + } + + /* compute n */ + z = __scalbnf (z, q0); /* actual value of z */ + z -= 8.0 * __floorf (z * 0.125); /* trim off integer >= 8 */ + n = (int32_t) z; + z -= __truncf (z); + ih = 0; + if (q0 > 0) + { /* need iq[jz-1] to determine n */ + i = (iq[jz - 1] >> (8 - q0)); + n += i; + iq[jz - 1] -= i << (8 - q0); + ih = iq[jz - 1] >> (7 - q0); + } + else if (q0 == 0) + ih = iq[jz - 1] >> 7; + else if (z >= 0.5) + ih = 2; + + if (ih > 0) + { /* q > 0.5 */ + n += 1; + carry = 0; + for (i = 0; i < jz; i++) + { /* compute 1-q */ + j = iq[i]; + if (carry == 0) + { + if (j != 0) + { + carry = 1; + iq[i] = 0x100 - j; + } + } + else + iq[i] = 0xff - j; + } + if (q0 > 0) + { /* rare case: chance is 1 in 12 */ + switch (q0) + { + case 1: + iq[jz - 1] &= 0x7f; + break; + case 2: + iq[jz - 1] &= 0x3f; + break; + } + } + if (ih == 2) + { + z = one - z; + if (carry != 0) + z -= __scalbnf (one, q0); + } + } + + /* check if recomputation is needed */ + if (z == zero) + { + j = 0; + for (i = jz - 1; i >= jk; i--) + j |= iq[i]; + if (j == 0) + { /* need recomputation */ + for (k = 1; iq[jk - k] == 0; k++); /* k = no. of terms needed */ + + for (i = jz + 1; i <= jz + k; i++) + { /* add q[jz+1] to q[jz+k] */ + f[jx + i] = two_over_pi[jv + i]; + for (j = 0, fw = 0.0; j <= jx; j++) + fw += x[j] * f[jx + i - j]; + q[i] = fw; + } + jz += k; + goto recompute; + } + } + + /* chop off zero terms */ + if (z == 0.0) + { + jz -= 1; + q0 -= 8; + while (iq[jz] == 0) + { + jz--; + q0 -= 8; + } + } + else + { /* break z into 8-bit if necessary */ + z = __scalbnf (z, -q0); + if (z >= two8) + { + fw = __truncf (twon8 * z); + iq[jz] = (int32_t) (z - two8 * fw); + jz += 1; + q0 += 8; + iq[jz] = (int32_t) fw; + } + else + iq[jz] = (int32_t) z; + } + + /* convert integer "bit" chunk to floating-point value */ + fw = __scalbnf (one, q0); + for (i = jz; i >= 0; i--) + { + q[i] = fw * (float) iq[i]; + fw *= twon8; + } + + /* compute PIo2[0,...,jp]*q[jz,...,0] */ + for (i = jz; i >= 0; i--) + { + for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++) + fw += PIo2[k] * q[i + k]; + fq[jz - i] = fw; + } + + /* compress fq[] into y[] */ + fw = 0.0; + for (i = jz; i >= 0; i--) + fw += fq[i]; + y[0] = (ih == 0) ? fw : -fw; + fw = fq[0] - fw; + for (i = 1; i <= jz; i++) + fw += fq[i]; + y[1] = (ih == 0) ? fw : -fw; + + return n & 7; +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu/k_sinf.c b/REORG.TODO/sysdeps/powerpc/fpu/k_sinf.c new file mode 100644 index 0000000000..251633dc30 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/k_sinf.c @@ -0,0 +1,57 @@ +/* k_sinf.c -- float version of k_sin.c + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <float.h> +#include <math.h> +#include <fenv.h> +#include <math_private.h> + + +static const float twom27 = 7.4505806000e-09; +static const float half = 5.0000000000e-01; +static const float S1 = -1.6666667163e-01; +static const float S2 = 8.3333337680e-03; +static const float S3 = -1.9841270114e-04; +static const float S4 = 2.7557314297e-06; +static const float S5 = -2.5050759689e-08; +static const float S6 = 1.5896910177e-10; + + +float +__kernel_sinf (float x, float y, int iy) +{ + float z, r, v; + float ix; + ix = __builtin_fabsf (x); + if (ix < twom27) + { /* |x| < 2**-27 */ + if (ix < FLT_MIN && ix != 0.0f) + __feraiseexcept (FE_UNDERFLOW|FE_INEXACT); + else + __feraiseexcept (FE_INEXACT); + return x; + } + z = x * x; + v = z * x; + r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6))); + if (iy == 0) + return x + v * (S1 + z * r); + else + return x - ((z * (half * y - v * r) - y) - v * S1); +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu/libm-test-ulps b/REORG.TODO/sysdeps/powerpc/fpu/libm-test-ulps new file mode 100644 index 0000000000..72eb2b1e5a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/libm-test-ulps @@ -0,0 +1,2342 @@ +# Begin of automatic generation + +# Maximal error of functions: +Function: "acos": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "acos_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "acos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "acos_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "acosh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "acosh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "acosh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "acosh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 4 + +Function: "asin": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asin_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "asin_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asinh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: "asinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "asinh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 7 +ldouble: 7 + +Function: "atan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "atan2": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "atan2_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "atan2_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: "atan2_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "atan_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "atan_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "atan_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atanh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "atanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atanh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "cabs": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_downward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_towardzero": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_upward": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cacos": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "cacos": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cacos_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "cacos_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "cacos_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "cacos_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "cacos_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "cacos_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 13 +ldouble: 13 + +Function: Real part of "cacosh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cacosh": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cacosh_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "cacosh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "cacosh_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "cacosh_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Real part of "cacosh_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 12 +ldouble: 12 + +Function: Imaginary part of "cacosh_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 8 +ldouble: 8 + +Function: "carg": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "carg_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "carg_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: "carg_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "casin": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "casin": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "casin_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "casin_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "casin_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "casin_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "casin_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "casin_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 13 +ldouble: 13 + +Function: Real part of "casinh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "casinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "casinh_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "casinh_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Real part of "casinh_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "casinh_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 5 +ldouble: 5 + +Function: Real part of "casinh_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 13 +ldouble: 13 + +Function: Imaginary part of "casinh_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Real part of "catan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "catan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "catan_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "catan_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Real part of "catan_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "catan_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catan_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "catan_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "catanh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "catanh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catanh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "catanh_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Real part of "catanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "catanh_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Real part of "catanh_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "catanh_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: "cbrt": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cbrt_downward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 5 +ldouble: 5 + +Function: "cbrt_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "cbrt_upward": +double: 5 +float: 1 +idouble: 5 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccos": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "ccos": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccos_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ccos_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ccos_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ccos_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ccos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ccos_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Real part of "ccosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "ccosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccosh_downward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ccosh_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ccosh_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ccosh_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ccosh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ccosh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Real part of "cexp": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cexp": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cexp_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 11 +ldouble: 11 + +Function: Imaginary part of "cexp_downward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 11 +ldouble: 11 + +Function: Real part of "cexp_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 11 +ldouble: 11 + +Function: Imaginary part of "cexp_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 11 +ldouble: 11 + +Function: Real part of "cexp_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cexp_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "clog": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "clog": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "clog10": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "clog10": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "clog10_downward": +double: 6 +float: 6 +idouble: 6 +ifloat: 6 +ildouble: 10 +ldouble: 10 + +Function: Imaginary part of "clog10_downward": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 7 +ldouble: 7 + +Function: Real part of "clog10_towardzero": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 9 +ldouble: 9 + +Function: Imaginary part of "clog10_towardzero": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 8 +ldouble: 8 + +Function: Real part of "clog10_upward": +double: 8 +float: 5 +idouble: 8 +ifloat: 5 +ildouble: 10 +ldouble: 10 + +Function: Imaginary part of "clog10_upward": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 7 +ldouble: 7 + +Function: Real part of "clog_downward": +double: 7 +float: 5 +idouble: 7 +ifloat: 5 +ildouble: 11 +ldouble: 11 + +Function: Imaginary part of "clog_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: Real part of "clog_towardzero": +double: 7 +float: 5 +idouble: 7 +ifloat: 5 +ildouble: 10 +ldouble: 10 + +Function: Imaginary part of "clog_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 7 +ldouble: 7 + +Function: Real part of "clog_upward": +double: 8 +float: 5 +idouble: 8 +ifloat: 5 +ildouble: 10 +ldouble: 10 + +Function: Imaginary part of "clog_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "cos": +float: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "cos_downward": +double: 1 +float: 4 +idouble: 1 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "cos_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "cos_upward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: "cosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "cosh_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "cosh_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "cosh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cpow": +double: 2 +float: 5 +idouble: 2 +ifloat: 5 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "cpow": +float: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cpow_downward": +double: 4 +float: 8 +idouble: 4 +ifloat: 8 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "cpow_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Real part of "cpow_towardzero": +double: 4 +float: 8 +idouble: 4 +ifloat: 8 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "cpow_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Real part of "cpow_upward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cpow_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csin": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csin": +ildouble: 1 +ldouble: 1 + +Function: Real part of "csin_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "csin_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: Real part of "csin_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "csin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: Real part of "csin_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "csin_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csinh": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "csinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csinh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "csinh_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "csinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "csinh_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "csinh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "csinh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csqrt": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "csqrt": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "csqrt_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "csqrt_downward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "csqrt_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "csqrt_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "csqrt_upward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 12 +ldouble: 12 + +Function: Imaginary part of "csqrt_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "ctan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ctan": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ctan_downward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ctan_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: Real part of "ctan_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ctan_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 13 +ldouble: 13 + +Function: Real part of "ctan_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "ctan_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 10 +ldouble: 10 + +Function: Real part of "ctanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ctanh": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "ctanh_downward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: Imaginary part of "ctanh_downward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ctanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 13 +ldouble: 13 + +Function: Imaginary part of "ctanh_towardzero": +double: 5 +float: 2 +idouble: 5 +ifloat: 2 +ildouble: 10 +ldouble: 10 + +Function: Real part of "ctanh_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 10 +ldouble: 10 + +Function: Imaginary part of "ctanh_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 10 +ldouble: 10 + +Function: "erf": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "erf_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "erf_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "erf_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "erfc": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "erfc_downward": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 10 +ldouble: 10 + +Function: "erfc_towardzero": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 9 +ldouble: 9 + +Function: "erfc_upward": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 7 +ldouble: 7 + +Function: "exp": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp10": +double: 2 +idouble: 2 +ildouble: 1 +ldouble: 1 + +Function: "exp10_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: "exp10_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: "exp10_upward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 4 +ldouble: 4 + +Function: "exp2": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "exp2_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp2_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "exp2_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "exp_downward": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 + +Function: "exp_towardzero": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 + +Function: "exp_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "expm1": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "expm1_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "expm1_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "expm1_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: "fma": +ildouble: 1 +ldouble: 1 + +Function: "fma_downward": +ildouble: 1 +ldouble: 1 + +Function: "fma_towardzero": +ildouble: 2 +ldouble: 2 + +Function: "fma_upward": +ildouble: 3 +ldouble: 3 + +Function: "fmod": +ildouble: 1 +ldouble: 1 + +Function: "fmod_downward": +ildouble: 1 +ldouble: 1 + +Function: "fmod_towardzero": +ildouble: 1 +ldouble: 1 + +Function: "fmod_upward": +ildouble: 1 +ldouble: 1 + +Function: "gamma": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: "gamma_downward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 15 +ldouble: 15 + +Function: "gamma_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 16 +ldouble: 16 + +Function: "gamma_upward": +double: 4 +float: 5 +idouble: 4 +ifloat: 5 +ildouble: 11 +ldouble: 11 + +Function: "hypot": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "hypot_downward": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 + +Function: "hypot_towardzero": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 + +Function: "hypot_upward": +double: 1 +idouble: 1 +ildouble: 3 +ldouble: 3 + +Function: "j0": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "j0_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 11 +ldouble: 11 + +Function: "j0_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 8 +ldouble: 8 + +Function: "j0_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: "j1": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "j1_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: "j1_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: "j1_upward": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 6 +ldouble: 6 + +Function: "jn": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: "jn_downward": +double: 4 +float: 5 +idouble: 4 +ifloat: 5 +ildouble: 7 +ldouble: 7 + +Function: "jn_towardzero": +double: 4 +float: 5 +idouble: 4 +ifloat: 5 +ildouble: 7 +ldouble: 7 + +Function: "jn_upward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "lgamma": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: "lgamma_downward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 15 +ldouble: 15 + +Function: "lgamma_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 16 +ldouble: 16 + +Function: "lgamma_upward": +double: 4 +float: 5 +idouble: 4 +ifloat: 5 +ildouble: 11 +ldouble: 11 + +Function: "log": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "log10": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log10_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 1 +ldouble: 1 + +Function: "log10_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log10_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log1p": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "log1p_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log1p_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "log1p_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "log2": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "log2_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: "log2_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "log2_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "log_downward": +float: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log_towardzero": +float: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "nextafter_downward": +ildouble: 1 +ldouble: 1 + +Function: "nextafter_upward": +ildouble: 1 +ldouble: 1 + +Function: "pow": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "pow10": +double: 2 +idouble: 2 +ildouble: 1 +ldouble: 1 + +Function: "pow10_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: "pow10_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: "pow10_upward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 4 +ldouble: 4 + +Function: "pow_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "pow_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "pow_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "sin": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "sin_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "sin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 4 +ldouble: 4 + +Function: "sin_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "sincos": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "sincos_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "sincos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 7 +ldouble: 7 + +Function: "sincos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: "sinh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sinh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: "sinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: "sinh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: "sqrt": +ildouble: 1 +ldouble: 1 + +Function: "sqrt_downward": +ildouble: 1 +ldouble: 1 + +Function: "sqrt_towardzero": +ildouble: 1 +ldouble: 1 + +Function: "sqrt_upward": +ildouble: 1 +ldouble: 1 + +Function: "tan": +float: 3 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: "tan_downward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "tan_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: "tan_upward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "tanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "tanh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "tanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "tanh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: "tgamma": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "tgamma_downward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 6 +ldouble: 6 + +Function: "tgamma_towardzero": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "tgamma_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +Function: "y0": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "y0_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 10 +ldouble: 10 + +Function: "y0_towardzero": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: "y0_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 9 +ldouble: 9 + +Function: "y1": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "y1_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: "y1_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 9 +ldouble: 9 + +Function: "y1_upward": +double: 5 +float: 2 +idouble: 5 +ifloat: 2 +ildouble: 9 +ldouble: 9 + +Function: "yn": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "yn_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 10 +ldouble: 10 + +Function: "yn_towardzero": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: "yn_upward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 9 +ldouble: 9 + +# end of automatic generation diff --git a/REORG.TODO/sysdeps/powerpc/fpu/libm-test-ulps-name b/REORG.TODO/sysdeps/powerpc/fpu/libm-test-ulps-name new file mode 100644 index 0000000000..8c5f7fa2ab --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/libm-test-ulps-name @@ -0,0 +1 @@ +PowerPC diff --git a/REORG.TODO/sysdeps/powerpc/fpu/math_ldbl.h b/REORG.TODO/sysdeps/powerpc/fpu/math_ldbl.h new file mode 100644 index 0000000000..05f51217bf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/math_ldbl.h @@ -0,0 +1,55 @@ +/* Manipulation of the bit representation of 'long double' quantities. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_LDBL_H_PPC_ +#define _MATH_LDBL_H_PPC_ 1 + +/* GCC does not optimize the default ldbl_pack code to not spill register + in the stack. The following optimization tells gcc that pack/unpack + is really a nop. We use fr1/fr2 because those are the regs used to + pass/return a single long double arg. */ +static inline long double +ldbl_pack_ppc (double a, double aa) +{ + register long double x __asm__ ("fr1"); + register double xh __asm__ ("fr1"); + register double xl __asm__ ("fr2"); + xh = a; + xl = aa; + __asm__ ("" : "=f" (x) : "f" (xh), "f" (xl)); + return x; +} + +static inline void +ldbl_unpack_ppc (long double l, double *a, double *aa) +{ + register long double x __asm__ ("fr1"); + register double xh __asm__ ("fr1"); + register double xl __asm__ ("fr2"); + x = l; + __asm__ ("" : "=f" (xh), "=f" (xl) : "f" (x)); + *a = xh; + *aa = xl; +} + +#define ldbl_pack ldbl_pack_ppc +#define ldbl_unpack ldbl_unpack_ppc + +#include <sysdeps/ieee754/ldbl-128ibm/math_ldbl.h> + +#endif /* math_ldbl.h */ diff --git a/REORG.TODO/sysdeps/powerpc/fpu/math_private.h b/REORG.TODO/sysdeps/powerpc/fpu/math_private.h new file mode 100644 index 0000000000..3c71275392 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/math_private.h @@ -0,0 +1,142 @@ +/* Private inline math functions for powerpc. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _PPC_MATH_PRIVATE_H_ +#define _PPC_MATH_PRIVATE_H_ + +#include <sysdep.h> +#include <ldsodefs.h> +#include <dl-procinfo.h> +#include <fenv_private.h> +#include_next <math_private.h> + +extern double __slow_ieee754_sqrt (double); +extern __always_inline double +__ieee754_sqrt (double __x) +{ + double __z; + +#ifdef _ARCH_PPCSQ + asm ("fsqrt %0,%1" : "=f" (__z) : "f" (__x)); +#else + __z = __slow_ieee754_sqrt(__x); +#endif + + return __z; +} + +extern float __slow_ieee754_sqrtf (float); +extern __always_inline float +__ieee754_sqrtf (float __x) +{ + float __z; + +#ifdef _ARCH_PPCSQ + asm ("fsqrts %0,%1" : "=f" (__z) : "f" (__x)); +#else + __z = __slow_ieee754_sqrtf(__x); +#endif + + return __z; +} + +#if defined _ARCH_PWR5X + +# ifndef __round +# define __round(x) \ + ({ double __z; \ + __asm __volatile ( \ + " frin %0,%1\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif +# ifndef __roundf +# define __roundf(x) \ + ({ float __z; \ + __asm __volatile ( \ + " frin %0,%1\n" \ + " frsp %0,%0\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif + +# ifndef __trunc +# define __trunc(x) \ + ({ double __z; \ + __asm __volatile ( \ + " friz %0,%1\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif +# ifndef __truncf +# define __truncf(x) \ + ({ float __z; \ + __asm __volatile ( \ + " friz %0,%1\n" \ + " frsp %0,%0\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif + +# ifndef __ceil +# define __ceil(x) \ + ({ double __z; \ + __asm __volatile ( \ + " frip %0,%1\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif +# ifndef __ceilf +# define __ceilf(x) \ + ({ float __z; \ + __asm __volatile ( \ + " frip %0,%1\n" \ + " frsp %0,%0\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif + +# ifndef __floor +# define __floor(x) \ + ({ double __z; \ + __asm __volatile ( \ + " frim %0,%1\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif +# ifndef __floorf +# define __floorf(x) \ + ({ float __z; \ + __asm __volatile ( \ + " frim %0,%1\n" \ + " frsp %0,%0\n" \ + : "=f" (__z) \ + : "f" (x)); \ + __z; }) +# endif + +#endif /* defined _ARCH_PWR5X */ + +#endif /* _PPC_MATH_PRIVATE_H_ */ diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_cosf.c b/REORG.TODO/sysdeps/powerpc/fpu/s_cosf.c new file mode 100644 index 0000000000..772b138ac3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_cosf.c @@ -0,0 +1,69 @@ +/* s_cosf.c -- float version of s_cos.c. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <math.h> +#include <math_private.h> + +static const float pio4 = 7.8539801e-1; + +float +__cosf (float x) +{ + float y[2], z = 0.0; + float ix; + int32_t n; + + ix = __builtin_fabsf (x); + + /* |x| ~< pi/4 */ + if (ix <= pio4) + { + return __kernel_cosf (x, z); + /* cos(Inf or NaN) is NaN */ + } + else if (isnanf (ix)) + { + return x - x; + } + else if (isinff (ix)) + { + __set_errno (EDOM); + return x - x; + } + + /* argument reduction needed */ + else + { + n = __ieee754_rem_pio2f (x, y); + switch (n & 3) + { + case 0: + return __kernel_cosf (y[0], y[1]); + case 1: + return -__kernel_sinf (y[0], y[1], 1); + case 2: + return -__kernel_cosf (y[0], y[1]); + default: + return __kernel_sinf (y[0], y[1], 1); + } + } +} + +weak_alias (__cosf, cosf) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_fabs.S b/REORG.TODO/sysdeps/powerpc/fpu/s_fabs.S new file mode 100644 index 0000000000..87dc82eb28 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_fabs.S @@ -0,0 +1,36 @@ +/* Floating-point absolute value. PowerPC version. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__fabs) +/* double [f1] fabs (double [f1] x); */ + fabs fp1,fp1 + blr +END(__fabs) + +weak_alias (__fabs,fabs) + +/* It turns out that it's safe to use this code even for single-precision. */ +strong_alias(__fabs,__fabsf) +weak_alias (__fabs,fabsf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__fabs,__fabsl) +weak_alias (__fabs,fabsl) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_fabsf.S b/REORG.TODO/sysdeps/powerpc/fpu/s_fabsf.S new file mode 100644 index 0000000000..877c710ce8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_fabsf.S @@ -0,0 +1 @@ +/* __fabsf is in s_fabs.S */ diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_float_bitwise.h b/REORG.TODO/sysdeps/powerpc/fpu/s_float_bitwise.h new file mode 100644 index 0000000000..8e63fb253b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_float_bitwise.h @@ -0,0 +1,115 @@ +/* Bitwise manipulation over float. Function prototypes. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _FLOAT_BITWISE_ +#define _FLOAT_BITWISE_ 1 + +#include <math_private.h> + +/* Returns (int)(num & 0x7FFFFFF0 == value) */ +static inline int +__float_and_test28 (float num, float value) +{ + float ret; +#ifdef _ARCH_PWR7 + union { + int i; + float f; + } mask = { .i = 0x7ffffff0 }; + __asm__ ( + /* the 'f' constraint is used on mask because we just need + * to compare floats, not full vector */ + "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f) + ); +#else + int32_t inum; + GET_FLOAT_WORD(inum, num); + inum = (inum & 0x7ffffff0); + SET_FLOAT_WORD(ret, inum); +#endif + return (ret == value); +} + +/* Returns (int)(num & 0x7FFFFF00 == value) */ +static inline int +__float_and_test24 (float num, float value) +{ + float ret; +#ifdef _ARCH_PWR7 + union { + int i; + float f; + } mask = { .i = 0x7fffff00 }; + __asm__ ( + "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f) + ); +#else + int32_t inum; + GET_FLOAT_WORD(inum, num); + inum = (inum & 0x7fffff00); + SET_FLOAT_WORD(ret, inum); +#endif + return (ret == value); +} + +/* Returns (float)(num & 0x7F800000) */ +static inline float +__float_and8 (float num) +{ + float ret; +#ifdef _ARCH_PWR7 + union { + int i; + float f; + } mask = { .i = 0x7f800000 }; + __asm__ ( + "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f) + ); +#else + int32_t inum; + GET_FLOAT_WORD(inum, num); + inum = (inum & 0x7f800000); + SET_FLOAT_WORD(ret, inum); +#endif + return ret; +} + +/* Returns ((int32_t)(num & 0x7F800000) >> 23) */ +static inline int32_t +__float_get_exp (float num) +{ + int32_t inum; +#ifdef _ARCH_PWR7 + float ret; + union { + int i; + float f; + } mask = { .i = 0x7f800000 }; + __asm__ ( + "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f) + ); + GET_FLOAT_WORD(inum, ret); +#else + GET_FLOAT_WORD(inum, num); + inum = inum & 0x7f800000; +#endif + return inum >> 23; +} + +#endif /* s_float_bitwise.h */ diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_fma.S b/REORG.TODO/sysdeps/powerpc/fpu/s_fma.S new file mode 100644 index 0000000000..e101f374bf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_fma.S @@ -0,0 +1,32 @@ +/* Compute x * y + z as ternary operation. PowerPC version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__fma) +/* double [f1] fma (double [f1] x, double [f2] y, double [f3] z); */ + fmadd fp1,fp1,fp2,fp3 + blr +END(__fma) + +weak_alias (__fma,fma) + +#ifdef NO_LONG_DOUBLE +weak_alias (__fma,__fmal) +weak_alias (__fma,fmal) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_fmaf.S b/REORG.TODO/sysdeps/powerpc/fpu/s_fmaf.S new file mode 100644 index 0000000000..49ea298707 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_fmaf.S @@ -0,0 +1,27 @@ +/* Compute x * y + z as ternary operation. PowerPC version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__fmaf) +/* float [f1] fmaf (float [f1] x, float [f2] y, float [f3] z); */ + fmadds fp1,fp1,fp2,fp3 + blr +END(__fmaf) + +weak_alias (__fmaf,fmaf) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_isnan.c b/REORG.TODO/sysdeps/powerpc/fpu/s_isnan.c new file mode 100644 index 0000000000..f75391fa80 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_isnan.c @@ -0,0 +1,62 @@ +/* Return 1 if argument is a NaN, else 0. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Ugly kludge to avoid declarations. */ +#define __isnanf __Xisnanf +#define isnanf Xisnanf +#define __GI___isnanf __GI___Xisnanf + +#include <math.h> +#include <math_ldbl_opt.h> +#include <fenv_libc.h> + +#undef __isnanf +#undef isnanf +#undef __GI___isnanf + + +/* The hidden_proto in include/math.h was obscured by the macro hackery. */ +__typeof (__isnan) __isnanf; +hidden_proto (__isnanf) + + +int +__isnan (double x) +{ + fenv_t savedstate; + int result; + savedstate = fegetenv_register (); + reset_fpscr_bit (FPSCR_VE); + result = !(x == x); + fesetenv_register (savedstate); + return result; +} +hidden_def (__isnan) +weak_alias (__isnan, isnan) + + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_isnanf.S b/REORG.TODO/sysdeps/powerpc/fpu/s_isnanf.S new file mode 100644 index 0000000000..fc22f678a1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_isnanf.S @@ -0,0 +1 @@ +/* __isnanf is in s_isnan.c */ diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_lrintf.S b/REORG.TODO/sysdeps/powerpc/fpu/s_lrintf.S new file mode 100644 index 0000000000..e24766535f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_lrintf.S @@ -0,0 +1 @@ +/* __lrintf is in s_lrint.c */ diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_rint.c b/REORG.TODO/sysdeps/powerpc/fpu/s_rint.c new file mode 100644 index 0000000000..a96140b2c9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_rint.c @@ -0,0 +1,46 @@ +/* Round a 64-bit floating point value to the nearest integer. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +double +__rint (double x) +{ + static const float TWO52 = 4503599627370496.0; + + if (fabs (x) < TWO52) + { + if (x > 0.0) + { + x += TWO52; + x -= TWO52; + } + else if (x < 0.0) + { + x = TWO52 - x; + x = -(x - TWO52); + } + } + + return x; +} +weak_alias (__rint, rint) +#ifdef NO_LONG_DOUBLE +strong_alias (__rint, __rintl) +weak_alias (__rint, rintl) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_rintf.c b/REORG.TODO/sysdeps/powerpc/fpu/s_rintf.c new file mode 100644 index 0000000000..6b16c7bec4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_rintf.c @@ -0,0 +1,42 @@ +/* Round a 32-bit floating point value to the nearest integer. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +float +__rintf (float x) +{ + static const float TWO23 = 8388608.0; + + if (fabsf (x) < TWO23) + { + if (x > 0.0) + { + x += TWO23; + x -= TWO23; + } + else if (x < 0.0) + { + x = TWO23 - x; + x = -(x - TWO23); + } + } + + return x; +} +weak_alias (__rintf, rintf) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/s_sinf.c b/REORG.TODO/sysdeps/powerpc/fpu/s_sinf.c new file mode 100644 index 0000000000..54a428e68a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/s_sinf.c @@ -0,0 +1,69 @@ +/* s_sinf.c -- float version of s_sin.c. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <math.h> +#include <math_private.h> + +static const float pio4 = 7.8539801e-1; + +float +__sinf (float x) +{ + float y[2], z = 0.0; + float ix; + int32_t n; + + ix = __builtin_fabsf (x); + + /* |x| ~< pi/4 */ + if (ix <= pio4) + { + return __kernel_sinf (x, z, 0); + /* sin(Inf or NaN) is NaN */ + } + else if (isnanf (ix)) + { + return x - x; + } + else if (isinff (ix)) + { + __set_errno (EDOM); + return x - x; + } + + /* argument reduction needed */ + else + { + n = __ieee754_rem_pio2f (x, y); + switch (n & 3) + { + case 0: + return __kernel_sinf (y[0], y[1], 1); + case 1: + return __kernel_cosf (y[0], y[1]); + case 2: + return -__kernel_sinf (y[0], y[1], 1); + default: + return -__kernel_cosf (y[0], y[1]); + } + } +} + +weak_alias (__sinf, sinf) diff --git a/REORG.TODO/sysdeps/powerpc/fpu/t_sqrt.c b/REORG.TODO/sysdeps/powerpc/fpu/t_sqrt.c new file mode 100644 index 0000000000..9ed7436ae6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/t_sqrt.c @@ -0,0 +1,144 @@ +const float __t_sqrt[1024] = { +0.7078,0.7064, 0.7092,0.7050, 0.7106,0.7037, 0.7119,0.7023, 0.7133,0.7010, +0.7147,0.6996, 0.7160,0.6983, 0.7174,0.6970, 0.7187,0.6957, 0.7201,0.6943, +0.7215,0.6930, 0.7228,0.6917, 0.7242,0.6905, 0.7255,0.6892, 0.7269,0.6879, +0.7282,0.6866, 0.7295,0.6854, 0.7309,0.6841, 0.7322,0.6829, 0.7335,0.6816, +0.7349,0.6804, 0.7362,0.6792, 0.7375,0.6779, 0.7388,0.6767, 0.7402,0.6755, +0.7415,0.6743, 0.7428,0.6731, 0.7441,0.6719, 0.7454,0.6708, 0.7467,0.6696, +0.7480,0.6684, 0.7493,0.6672, 0.7507,0.6661, 0.7520,0.6649, 0.7532,0.6638, +0.7545,0.6627, 0.7558,0.6615, 0.7571,0.6604, 0.7584,0.6593, 0.7597,0.6582, +0.7610,0.6570, 0.7623,0.6559, 0.7635,0.6548, 0.7648,0.6537, 0.7661,0.6527, +0.7674,0.6516, 0.7686,0.6505, 0.7699,0.6494, 0.7712,0.6484, 0.7725,0.6473, +0.7737,0.6462, 0.7750,0.6452, 0.7762,0.6441, 0.7775,0.6431, 0.7787,0.6421, +0.7800,0.6410, 0.7812,0.6400, 0.7825,0.6390, 0.7837,0.6380, 0.7850,0.6370, +0.7862,0.6359, 0.7875,0.6349, 0.7887,0.6339, 0.7900,0.6330, 0.7912,0.6320, +0.7924,0.6310, 0.7937,0.6300, 0.7949,0.6290, 0.7961,0.6281, 0.7973,0.6271, +0.7986,0.6261, 0.7998,0.6252, 0.8010,0.6242, 0.8022,0.6233, 0.8034,0.6223, +0.8046,0.6214, 0.8059,0.6205, 0.8071,0.6195, 0.8083,0.6186, 0.8095,0.6177, +0.8107,0.6168, 0.8119,0.6158, 0.8131,0.6149, 0.8143,0.6140, 0.8155,0.6131, +0.8167,0.6122, 0.8179,0.6113, 0.8191,0.6104, 0.8203,0.6096, 0.8215,0.6087, +0.8227,0.6078, 0.8238,0.6069, 0.8250,0.6060, 0.8262,0.6052, 0.8274,0.6043, +0.8286,0.6035, 0.8297,0.6026, 0.8309,0.6017, 0.8321,0.6009, 0.8333,0.6000, +0.8344,0.5992, 0.8356,0.5984, 0.8368,0.5975, 0.8379,0.5967, 0.8391,0.5959, +0.8403,0.5950, 0.8414,0.5942, 0.8426,0.5934, 0.8437,0.5926, 0.8449,0.5918, +0.8461,0.5910, 0.8472,0.5902, 0.8484,0.5894, 0.8495,0.5886, 0.8507,0.5878, +0.8518,0.5870, 0.8530,0.5862, 0.8541,0.5854, 0.8552,0.5846, 0.8564,0.5838, +0.8575,0.5831, 0.8587,0.5823, 0.8598,0.5815, 0.8609,0.5808, 0.8621,0.5800, +0.8632,0.5792, 0.8643,0.5785, 0.8655,0.5777, 0.8666,0.5770, 0.8677,0.5762, +0.8688,0.5755, 0.8700,0.5747, 0.8711,0.5740, 0.8722,0.5733, 0.8733,0.5725, +0.8744,0.5718, 0.8756,0.5711, 0.8767,0.5703, 0.8778,0.5696, 0.8789,0.5689, +0.8800,0.5682, 0.8811,0.5675, 0.8822,0.5667, 0.8833,0.5660, 0.8844,0.5653, +0.8855,0.5646, 0.8866,0.5639, 0.8877,0.5632, 0.8888,0.5625, 0.8899,0.5618, +0.8910,0.5611, 0.8921,0.5605, 0.8932,0.5598, 0.8943,0.5591, 0.8954,0.5584, +0.8965,0.5577, 0.8976,0.5570, 0.8987,0.5564, 0.8998,0.5557, 0.9008,0.5550, +0.9019,0.5544, 0.9030,0.5537, 0.9041,0.5530, 0.9052,0.5524, 0.9062,0.5517, +0.9073,0.5511, 0.9084,0.5504, 0.9095,0.5498, 0.9105,0.5491, 0.9116,0.5485, +0.9127,0.5478, 0.9138,0.5472, 0.9148,0.5465, 0.9159,0.5459, 0.9170,0.5453, +0.9180,0.5446, 0.9191,0.5440, 0.9202,0.5434, 0.9212,0.5428, 0.9223,0.5421, +0.9233,0.5415, 0.9244,0.5409, 0.9254,0.5403, 0.9265,0.5397, 0.9276,0.5391, +0.9286,0.5384, 0.9297,0.5378, 0.9307,0.5372, 0.9318,0.5366, 0.9328,0.5360, +0.9338,0.5354, 0.9349,0.5348, 0.9359,0.5342, 0.9370,0.5336, 0.9380,0.5330, +0.9391,0.5324, 0.9401,0.5319, 0.9411,0.5313, 0.9422,0.5307, 0.9432,0.5301, +0.9442,0.5295, 0.9453,0.5289, 0.9463,0.5284, 0.9473,0.5278, 0.9484,0.5272, +0.9494,0.5266, 0.9504,0.5261, 0.9515,0.5255, 0.9525,0.5249, 0.9535,0.5244, +0.9545,0.5238, 0.9556,0.5233, 0.9566,0.5227, 0.9576,0.5221, 0.9586,0.5216, +0.9596,0.5210, 0.9607,0.5205, 0.9617,0.5199, 0.9627,0.5194, 0.9637,0.5188, +0.9647,0.5183, 0.9657,0.5177, 0.9667,0.5172, 0.9677,0.5167, 0.9687,0.5161, +0.9698,0.5156, 0.9708,0.5151, 0.9718,0.5145, 0.9728,0.5140, 0.9738,0.5135, +0.9748,0.5129, 0.9758,0.5124, 0.9768,0.5119, 0.9778,0.5114, 0.9788,0.5108, +0.9798,0.5103, 0.9808,0.5098, 0.9818,0.5093, 0.9828,0.5088, 0.9838,0.5083, +0.9847,0.5077, 0.9857,0.5072, 0.9867,0.5067, 0.9877,0.5062, 0.9887,0.5057, +0.9897,0.5052, 0.9907,0.5047, 0.9917,0.5042, 0.9926,0.5037, 0.9936,0.5032, +0.9946,0.5027, 0.9956,0.5022, 0.9966,0.5017, 0.9976,0.5012, 0.9985,0.5007, +0.9995,0.5002, +1.0010,0.4995, 1.0029,0.4985, 1.0049,0.4976, 1.0068,0.4966, 1.0088,0.4957, +1.0107,0.4947, 1.0126,0.4938, 1.0145,0.4928, 1.0165,0.4919, 1.0184,0.4910, +1.0203,0.4901, 1.0222,0.4891, 1.0241,0.4882, 1.0260,0.4873, 1.0279,0.4864, +1.0298,0.4855, 1.0317,0.4846, 1.0336,0.4837, 1.0355,0.4829, 1.0374,0.4820, +1.0393,0.4811, 1.0411,0.4802, 1.0430,0.4794, 1.0449,0.4785, 1.0468,0.4777, +1.0486,0.4768, 1.0505,0.4760, 1.0523,0.4751, 1.0542,0.4743, 1.0560,0.4735, +1.0579,0.4726, 1.0597,0.4718, 1.0616,0.4710, 1.0634,0.4702, 1.0653,0.4694, +1.0671,0.4686, 1.0689,0.4678, 1.0707,0.4670, 1.0726,0.4662, 1.0744,0.4654, +1.0762,0.4646, 1.0780,0.4638, 1.0798,0.4630, 1.0816,0.4623, 1.0834,0.4615, +1.0852,0.4607, 1.0870,0.4600, 1.0888,0.4592, 1.0906,0.4585, 1.0924,0.4577, +1.0942,0.4570, 1.0960,0.4562, 1.0978,0.4555, 1.0995,0.4547, 1.1013,0.4540, +1.1031,0.4533, 1.1049,0.4525, 1.1066,0.4518, 1.1084,0.4511, 1.1101,0.4504, +1.1119,0.4497, 1.1137,0.4490, 1.1154,0.4483, 1.1172,0.4476, 1.1189,0.4469, +1.1207,0.4462, 1.1224,0.4455, 1.1241,0.4448, 1.1259,0.4441, 1.1276,0.4434, +1.1293,0.4427, 1.1311,0.4421, 1.1328,0.4414, 1.1345,0.4407, 1.1362,0.4401, +1.1379,0.4394, 1.1397,0.4387, 1.1414,0.4381, 1.1431,0.4374, 1.1448,0.4368, +1.1465,0.4361, 1.1482,0.4355, 1.1499,0.4348, 1.1516,0.4342, 1.1533,0.4335, +1.1550,0.4329, 1.1567,0.4323, 1.1584,0.4316, 1.1600,0.4310, 1.1617,0.4304, +1.1634,0.4298, 1.1651,0.4292, 1.1668,0.4285, 1.1684,0.4279, 1.1701,0.4273, +1.1718,0.4267, 1.1734,0.4261, 1.1751,0.4255, 1.1768,0.4249, 1.1784,0.4243, +1.1801,0.4237, 1.1817,0.4231, 1.1834,0.4225, 1.1850,0.4219, 1.1867,0.4213, +1.1883,0.4208, 1.1900,0.4202, 1.1916,0.4196, 1.1932,0.4190, 1.1949,0.4185, +1.1965,0.4179, 1.1981,0.4173, 1.1998,0.4167, 1.2014,0.4162, 1.2030,0.4156, +1.2046,0.4151, 1.2063,0.4145, 1.2079,0.4139, 1.2095,0.4134, 1.2111,0.4128, +1.2127,0.4123, 1.2143,0.4117, 1.2159,0.4112, 1.2175,0.4107, 1.2192,0.4101, +1.2208,0.4096, 1.2224,0.4090, 1.2239,0.4085, 1.2255,0.4080, 1.2271,0.4075, +1.2287,0.4069, 1.2303,0.4064, 1.2319,0.4059, 1.2335,0.4054, 1.2351,0.4048, +1.2366,0.4043, 1.2382,0.4038, 1.2398,0.4033, 1.2414,0.4028, 1.2429,0.4023, +1.2445,0.4018, 1.2461,0.4013, 1.2477,0.4008, 1.2492,0.4003, 1.2508,0.3998, +1.2523,0.3993, 1.2539,0.3988, 1.2555,0.3983, 1.2570,0.3978, 1.2586,0.3973, +1.2601,0.3968, 1.2617,0.3963, 1.2632,0.3958, 1.2648,0.3953, 1.2663,0.3949, +1.2678,0.3944, 1.2694,0.3939, 1.2709,0.3934, 1.2725,0.3929, 1.2740,0.3925, +1.2755,0.3920, 1.2771,0.3915, 1.2786,0.3911, 1.2801,0.3906, 1.2816,0.3901, +1.2832,0.3897, 1.2847,0.3892, 1.2862,0.3887, 1.2877,0.3883, 1.2892,0.3878, +1.2907,0.3874, 1.2923,0.3869, 1.2938,0.3865, 1.2953,0.3860, 1.2968,0.3856, +1.2983,0.3851, 1.2998,0.3847, 1.3013,0.3842, 1.3028,0.3838, 1.3043,0.3834, +1.3058,0.3829, 1.3073,0.3825, 1.3088,0.3820, 1.3103,0.3816, 1.3118,0.3812, +1.3132,0.3807, 1.3147,0.3803, 1.3162,0.3799, 1.3177,0.3794, 1.3192,0.3790, +1.3207,0.3786, 1.3221,0.3782, 1.3236,0.3778, 1.3251,0.3773, 1.3266,0.3769, +1.3280,0.3765, 1.3295,0.3761, 1.3310,0.3757, 1.3324,0.3753, 1.3339,0.3748, +1.3354,0.3744, 1.3368,0.3740, 1.3383,0.3736, 1.3397,0.3732, 1.3412,0.3728, +1.3427,0.3724, 1.3441,0.3720, 1.3456,0.3716, 1.3470,0.3712, 1.3485,0.3708, +1.3499,0.3704, 1.3514,0.3700, 1.3528,0.3696, 1.3542,0.3692, 1.3557,0.3688, +1.3571,0.3684, 1.3586,0.3680, 1.3600,0.3676, 1.3614,0.3673, 1.3629,0.3669, +1.3643,0.3665, 1.3657,0.3661, 1.3672,0.3657, 1.3686,0.3653, 1.3700,0.3650, +1.3714,0.3646, 1.3729,0.3642, 1.3743,0.3638, 1.3757,0.3634, 1.3771,0.3631, +1.3785,0.3627, 1.3800,0.3623, 1.3814,0.3620, 1.3828,0.3616, 1.3842,0.3612, +1.3856,0.3609, 1.3870,0.3605, 1.3884,0.3601, 1.3898,0.3598, 1.3912,0.3594, +1.3926,0.3590, 1.3940,0.3587, 1.3954,0.3583, 1.3968,0.3580, 1.3982,0.3576, +1.3996,0.3572, 1.4010,0.3569, 1.4024,0.3565, 1.4038,0.3562, 1.4052,0.3558, +1.4066,0.3555, 1.4080,0.3551, 1.4094,0.3548, 1.4108,0.3544, 1.4121,0.3541, +1.4135,0.3537 +}; + + +/* Generated by: */ +#if 0 +#include <math.h> +#include <stdio.h> +#include <assert.h> + +int +main(int argc, char **argv) +{ + int i, j; + + printf ("const float __t_sqrt[1024] = {"); + for (i = 0; i < 2; i++) + { + putchar('\n'); + for (j = 0; j < 256; j++) + { + double mval = j/512.0 + 0.5; + double eval = i==0 ? 1.0 : 2.0; + double ls = sqrt(mval*eval); + double hs = sqrt((mval+1/512.0)*eval); + double as = (ls+hs)*0.5; + double lx = 1/(2.0*ls); + double hx = 1/(2.0*hs); + double ax = (lx+hx)*0.5; + + printf("%.4f,%.4f%s",as,ax, + i*j==255 ? "\n" : j % 5 == 4 ? ",\n" : ", "); + assert((hs-ls)/as < 1/256.0); + assert((hx-lx)/ax < 1/256.0); + } + } + printf ("};\n"); + return 0; +} +#endif /* 0 */ diff --git a/REORG.TODO/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c b/REORG.TODO/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c new file mode 100644 index 0000000000..4e3f90d4d3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c @@ -0,0 +1,370 @@ +/* Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ryan S. Arnold <rsa@us.ibm.com> + Sean Curry <spcurry@us.ibm.com> + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <unistd.h> +#include <malloc.h> +#include <link.h> +#include <elf.h> +#include <fpu_control.h> +#include <sys/auxv.h> + +static ucontext_t ctx[3]; + + +volatile int global; + + +static int back_in_main; + + +volatile static ElfW(auxv_t) *auxv = NULL; + +ElfW(Addr) query_auxv(int type) +{ + FILE *auxv_f; + ElfW(auxv_t) auxv_struct; + ElfW(auxv_t) *auxv_temp; + int i = 0; + + /* if the /proc/self/auxv file has not been manually copied into the heap + yet, then do it */ + + if(auxv == NULL) + { + auxv_f = fopen("/proc/self/auxv", "r"); + + if(auxv_f == 0) + { + perror("Error opening file for reading"); + return 0; + } + auxv = (ElfW(auxv_t) *)malloc(getpagesize()); + + do + { + fread(&auxv_struct, sizeof(ElfW(auxv_t)), 1, auxv_f); + auxv[i] = auxv_struct; + i++; + } while(auxv_struct.a_type != AT_NULL); + } + + auxv_temp = (ElfW(auxv_t) *)auxv; + i = 0; + do + { + if(auxv_temp[i].a_type == type) + { + return auxv_temp[i].a_un.a_val; + } + i++; + } while (auxv_temp[i].a_type != AT_NULL); + + return 0; +} + +typedef unsigned int di_fpscr_t __attribute__ ((__mode__ (__DI__))); +typedef unsigned int si_fpscr_t __attribute__ ((__mode__ (__SI__))); + +#define _FPSCR_RESERVED 0xfffffff8ffffff04ULL + +#define _FPSCR_TEST0_DRN 0x0000000400000000ULL +#define _FPSCR_TEST0_RN 0x0000000000000003ULL + +#define _FPSCR_TEST1_DRN 0x0000000300000000ULL +#define _FPSCR_TEST1_RN 0x0000000000000002ULL + +/* Macros for accessing the hardware control word on Power6[x]. */ +#define _GET_DI_FPSCR(__fpscr) \ + ({union { double d; di_fpscr_t fpscr; } u; \ + register double fr; \ + __asm__ ("mffs %0" : "=f" (fr)); \ + u.d = fr; \ + (__fpscr) = u.fpscr; \ + u.fpscr; \ + }) + +/* We make sure to zero fp after we use it in order to prevent stale data + in an fp register from making a test-case pass erroneously. */ +# define _SET_DI_FPSCR(__fpscr) \ + { union { double d; di_fpscr_t fpscr; } u; \ + register double fr; \ + u.fpscr = __fpscr; \ + fr = u.d; \ + /* Set the entire 64-bit FPSCR. */ \ + __asm__ (".machine push; " \ + ".machine \"power6\"; " \ + "mtfsf 255,%0,1,0; " \ + ".machine pop" : : "f" (fr)); \ + fr = 0.0; \ + } + +# define _GET_SI_FPSCR(__fpscr) \ + ({union { double d; di_fpscr_t fpscr; } u; \ + register double fr; \ + __asm__ ("mffs %0" : "=f" (fr)); \ + u.d = fr; \ + (__fpscr) = (si_fpscr_t) u.fpscr; \ + (si_fpscr_t) u.fpscr; \ + }) + +/* We make sure to zero fp after we use it in order to prevent stale data + in an fp register from making a test-case pass erroneously. */ +# define _SET_SI_FPSCR(__fpscr) \ + { union { double d; di_fpscr_t fpscr; } u; \ + register double fr; \ + /* More-or-less arbitrary; this is a QNaN. */ \ + u.fpscr = 0xfff80000ULL << 32; \ + u.fpscr |= __fpscr & 0xffffffffULL; \ + fr = u.d; \ + __asm__ ("mtfsf 255,%0" : : "f" (fr)); \ + fr = 0.0; \ + } + +void prime_special_regs(int which) +{ + ElfW(Addr) a_val; + + di_fpscr_t di_fpscr __attribute__ ((__aligned__(8))); + + a_val = query_auxv(AT_HWCAP); + if(a_val == -1) + { + puts ("querying the auxv for the hwcap failed"); + _exit (1); + } + + /* Indicates a 64-bit FPSCR. */ + if (a_val & PPC_FEATURE_HAS_DFP) + { + _GET_DI_FPSCR(di_fpscr); + + /* Overwrite the existing DRN and RN if there is one. */ + if (which == 0) + di_fpscr = ((di_fpscr & _FPSCR_RESERVED) | (_FPSCR_TEST0_DRN | _FPSCR_TEST0_RN)); + else + di_fpscr = ((di_fpscr & _FPSCR_RESERVED) | (_FPSCR_TEST1_DRN | _FPSCR_TEST1_RN)); + puts ("Priming 64-bit FPSCR with:"); + printf("0x%.16llx\n",(unsigned long long int)di_fpscr); + + _SET_DI_FPSCR(di_fpscr); + } + else + { + puts ("32-bit FPSCR found and will be tested."); + _GET_SI_FPSCR(di_fpscr); + + /* Overwrite the existing RN if there is one. */ + if (which == 0) + di_fpscr = ((di_fpscr & _FPSCR_RESERVED) | (_FPSCR_TEST0_RN)); + else + di_fpscr = ((di_fpscr & _FPSCR_RESERVED) | (_FPSCR_TEST1_RN)); + puts ("Priming 32-bit FPSCR with:"); + printf("0x%.8lx\n",(unsigned long int) di_fpscr); + + _SET_SI_FPSCR(di_fpscr); + } +} + +void clear_special_regs(void) +{ + ElfW(Addr) a_val; + + di_fpscr_t di_fpscr __attribute__ ((__aligned__(8))); + + union { + double d; + unsigned long long int lli; + unsigned int li[2]; + } dlli; + + a_val = query_auxv(AT_HWCAP); + if(a_val == -1) + { + puts ("querying the auxv for the hwcap failed"); + _exit (1); + } + +#if __WORDSIZE == 32 + dlli.d = ctx[0].uc_mcontext.uc_regs->fpregs.fpscr; +#else + dlli.d = ctx[0].uc_mcontext.fp_regs[32]; +#endif + + puts("The FPSCR value saved in the ucontext_t is:"); + + /* Indicates a 64-bit FPSCR. */ + if (a_val & PPC_FEATURE_HAS_DFP) + { + printf("0x%.16llx\n",dlli.lli); + di_fpscr = 0x0; + puts ("Clearing the 64-bit FPSCR to:"); + printf("0x%.16llx\n",(unsigned long long int) di_fpscr); + + _SET_DI_FPSCR(di_fpscr); + } + else + { + printf("0x%.8x\n",(unsigned int) dlli.li[1]); + di_fpscr = 0x0; + puts ("Clearing the 32-bit FPSCR to:"); + printf("0x%.8lx\n",(unsigned long int) di_fpscr); + + _SET_SI_FPSCR(di_fpscr); + } +} + +void test_special_regs(int which) +{ + ElfW(Addr) a_val; + unsigned long long int test; + + di_fpscr_t di_fpscr __attribute__ ((__aligned__(8))); + + a_val = query_auxv(AT_HWCAP); + if(a_val == -1) + { + puts ("querying the auxv for the hwcap failed"); + _exit (2); + } + + /* Indicates a 64-bit FPSCR. */ + if (a_val & PPC_FEATURE_HAS_DFP) + { + _GET_DI_FPSCR(di_fpscr); + + if (which == 0) + puts ("After setcontext the 64-bit FPSCR contains:"); + else + puts ("After swapcontext the 64-bit FPSCR contains:"); + + printf("0x%.16llx\n",(unsigned long long int) di_fpscr); + test = (_FPSCR_TEST0_DRN | _FPSCR_TEST0_RN); + if((di_fpscr & (test)) != (test)) + { + printf ("%s: DRN and RN bits set before getcontext were not preserved across [set|swap]context call: %m",__FUNCTION__); + _exit (3); + } + } + else + { + _GET_SI_FPSCR(di_fpscr); + if (which == 0) + puts ("After setcontext the 32-bit FPSCR contains:"); + else + puts ("After swapcontext the 32-bit FPSCR contains:"); + + printf("0x%.8lx\n",(unsigned long int) di_fpscr); + test = _FPSCR_TEST0_RN; + if((di_fpscr & test) != test) + { + printf ("%s: RN bit set before getcontext was not preserved across [set|swap]context call: %m",__FUNCTION__); + _exit (4); + } + } +} + + +static void +check_called (void) +{ + if (back_in_main == 0) + { + puts ("program did not reach main again"); + _exit (5); + } +} + + +int +main (void) +{ + atexit (check_called); + + puts ("priming the FPSCR with a marker"); + prime_special_regs (0); + + puts ("making contexts"); + if (getcontext (&ctx[0]) != 0) + { + if (errno == ENOSYS) + { + back_in_main = 1; + exit (0); + } + + printf ("%s: getcontext: %m\n", __FUNCTION__); + exit (6); + } + + /* Play some tricks with this context. */ + if (++global == 1) + { + clear_special_regs ( ); + if (setcontext (&ctx[0]) != 0) + { + printf ("%s: setcontext: %m\n", __FUNCTION__); + exit (7); + } + } + if (global != 2) + { + printf ("%s: 'global' not incremented twice\n", __FUNCTION__); + exit (8); + } + + test_special_regs (0); + + global = 0; + if (getcontext (&ctx[0]) != 0) + { + printf ("%s: getcontext: %m\n", __FUNCTION__); + exit (9); + } + + if (++global == 1) + { + puts ("priming the FPSCR with a marker"); + prime_special_regs (1); + + puts ("swapping contexts"); + if (swapcontext (&ctx[1], &ctx[0]) != 0) + { + printf ("%s: swapcontext: %m\n", __FUNCTION__); + exit (9); + } + } + if (global != 2) + { + printf ("%s: 'global' not incremented twice\n", __FUNCTION__); + exit (10); + } + + test_special_regs (1); + + puts ("back at main program"); + back_in_main = 1; + + puts ("test succeeded"); + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/fpu_control.h b/REORG.TODO/sysdeps/powerpc/fpu_control.h new file mode 100644 index 0000000000..842cbfa03f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/fpu_control.h @@ -0,0 +1,120 @@ +/* FPU control word definitions. PowerPC version. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _FPU_CONTROL_H +#define _FPU_CONTROL_H + +#ifdef _SOFT_FLOAT + +# define _FPU_RESERVED 0xffffffff +# define _FPU_DEFAULT 0x00000000 /* Default value. */ +typedef unsigned int fpu_control_t; +# define _FPU_GETCW(cw) (cw) = 0 +# define _FPU_SETCW(cw) (void) (cw) +extern fpu_control_t __fpu_control; + +#elif defined __NO_FPRS__ /* e500 */ + +/* rounding control */ +# define _FPU_RC_NEAREST 0x00 /* RECOMMENDED */ +# define _FPU_RC_DOWN 0x03 +# define _FPU_RC_UP 0x02 +# define _FPU_RC_ZERO 0x01 + +/* masking of interrupts */ +# define _FPU_MASK_ZM 0x10 /* zero divide */ +# define _FPU_MASK_OM 0x04 /* overflow */ +# define _FPU_MASK_UM 0x08 /* underflow */ +# define _FPU_MASK_XM 0x40 /* inexact */ +# define _FPU_MASK_IM 0x20 /* invalid operation */ + +# define _FPU_RESERVED 0x00c10080 /* These bits are reserved and not changed. */ + +/* Correct IEEE semantics require traps to be enabled at the hardware + level; the kernel then does the emulation and determines whether + generation of signals from those traps was enabled using prctl. */ +# define _FPU_DEFAULT 0x0000003c /* Default value. */ +# define _FPU_IEEE _FPU_DEFAULT + +/* Type of the control word. */ +typedef unsigned int fpu_control_t; + +/* Macros for accessing the hardware control word. */ +# define _FPU_GETCW(cw) \ + __asm__ volatile ("mfspefscr %0" : "=r" (cw)) +# define _FPU_SETCW(cw) \ + __asm__ volatile ("mtspefscr %0" : : "r" (cw)) + +/* Default control word set at startup. */ +extern fpu_control_t __fpu_control; + +#else /* PowerPC 6xx floating-point. */ + +/* rounding control */ +# define _FPU_RC_NEAREST 0x00 /* RECOMMENDED */ +# define _FPU_RC_DOWN 0x03 +# define _FPU_RC_UP 0x02 +# define _FPU_RC_ZERO 0x01 + +# define _FPU_MASK_NI 0x04 /* non-ieee mode */ + +/* masking of interrupts */ +# define _FPU_MASK_ZM 0x10 /* zero divide */ +# define _FPU_MASK_OM 0x40 /* overflow */ +# define _FPU_MASK_UM 0x20 /* underflow */ +# define _FPU_MASK_XM 0x08 /* inexact */ +# define _FPU_MASK_IM 0x80 /* invalid operation */ + +# define _FPU_RESERVED 0xffffff00 /* These bits are reserved are not changed. */ + +/* The fdlibm code requires no interrupts for exceptions. */ +# define _FPU_DEFAULT 0x00000000 /* Default value. */ + +/* IEEE: same as above, but (some) exceptions; + we leave the 'inexact' exception off. + */ +# define _FPU_IEEE 0x000000f0 + +/* Type of the control word. */ +typedef unsigned int fpu_control_t; + +/* Macros for accessing the hardware control word. */ +# define _FPU_GETCW(cw) \ + ({union { double __d; unsigned long long __ll; } __u; \ + register double __fr; \ + __asm__ ("mffs %0" : "=f" (__fr)); \ + __u.__d = __fr; \ + (cw) = (fpu_control_t) __u.__ll; \ + (fpu_control_t) __u.__ll; \ + }) + +# define _FPU_SETCW(cw) \ + { union { double __d; unsigned long long __ll; } __u; \ + register double __fr; \ + __u.__ll = 0xfff80000LL << 32; /* This is a QNaN. */ \ + __u.__ll |= (cw) & 0xffffffffLL; \ + __fr = __u.__d; \ + __asm__ ("mtfsf 255,%0" : : "f" (__fr)); \ + } + +/* Default control word set at startup. */ +extern fpu_control_t __fpu_control; + +#endif /* PowerPC 6xx floating-point. */ + +#endif /* _FPU_CONTROL_H */ diff --git a/REORG.TODO/sysdeps/powerpc/gccframe.h b/REORG.TODO/sysdeps/powerpc/gccframe.h new file mode 100644 index 0000000000..87edbae05b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/gccframe.h @@ -0,0 +1,21 @@ +/* Definition of object in frame unwind info. powerpc version. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define DWARF_FRAME_REGISTERS 77 + +#include <sysdeps/generic/gccframe.h> diff --git a/REORG.TODO/sysdeps/powerpc/hwcapinfo.c b/REORG.TODO/sysdeps/powerpc/hwcapinfo.c new file mode 100644 index 0000000000..82ad222c36 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/hwcapinfo.c @@ -0,0 +1,84 @@ +/* powerpc HWCAP/HWCAP2 and AT_PLATFORM data pre-processing. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <unistd.h> +#include <shlib-compat.h> +#include <dl-procinfo.h> + +uint64_t __tcb_hwcap __attribute__ ((visibility ("hidden"))); +uint32_t __tcb_platform __attribute__ ((visibility ("hidden"))); + +/* This function parses the HWCAP/HWCAP2 fields, adding the previous supported + ISA bits, as well as converting the AT_PLATFORM string to a number. This + data is stored in two global variables that can be used later by the + powerpc-specific code to store it into the TCB. */ +void +__tcb_parse_hwcap_and_convert_at_platform (void) +{ + + uint64_t h1, h2; + + /* Read AT_PLATFORM string from auxv and convert it to a number. */ + __tcb_platform = _dl_string_platform (GLRO (dl_platform)); + + /* Read HWCAP and HWCAP2 from auxv. */ + h1 = GLRO (dl_hwcap); + h2 = GLRO (dl_hwcap2); + + /* hwcap contains only the latest supported ISA, the code checks which is + and fills the previous supported ones. */ + + if (h2 & PPC_FEATURE2_ARCH_2_07) + h1 |= PPC_FEATURE_ARCH_2_06 + | PPC_FEATURE_ARCH_2_05 + | PPC_FEATURE_POWER5_PLUS + | PPC_FEATURE_POWER5 + | PPC_FEATURE_POWER4; + else if (h1 & PPC_FEATURE_ARCH_2_06) + h1 |= PPC_FEATURE_ARCH_2_05 + | PPC_FEATURE_POWER5_PLUS + | PPC_FEATURE_POWER5 + | PPC_FEATURE_POWER4; + else if (h1 & PPC_FEATURE_ARCH_2_05) + h1 |= PPC_FEATURE_POWER5_PLUS + | PPC_FEATURE_POWER5 + | PPC_FEATURE_POWER4; + else if (h1 & PPC_FEATURE_POWER5_PLUS) + h1 |= PPC_FEATURE_POWER5 + | PPC_FEATURE_POWER4; + else if (h1 & PPC_FEATURE_POWER5) + h1 |= PPC_FEATURE_POWER4; + + /* Consolidate both HWCAP and HWCAP2 into a single doubleword so that + we can read both in a single load later. */ + __tcb_hwcap = h2; + __tcb_hwcap = (h1 << 32) | __tcb_hwcap; + +} +#if IS_IN (rtld) +versioned_symbol (ld, __tcb_parse_hwcap_and_convert_at_platform, \ + __parse_hwcap_and_convert_at_platform, GLIBC_2_23); +#endif + +/* Export __parse_hwcap_and_convert_at_platform in libc.a. This is used by + GCC to make sure that the HWCAP/Platform bits are stored in the TCB when + using __builtin_cpu_is()/__builtin_cpu_supports() in the static case. */ +#ifndef SHARED +weak_alias (__tcb_parse_hwcap_and_convert_at_platform, \ + __parse_hwcap_and_convert_at_platform); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/hwcapinfo.h b/REORG.TODO/sysdeps/powerpc/hwcapinfo.h new file mode 100644 index 0000000000..830948a855 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/hwcapinfo.h @@ -0,0 +1,29 @@ +/* powerpc HWCAP/HWCAP2 and AT_PLATFORM data pre-processing. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> + +#ifndef HWCAPINFO_H +# define HWCAPINFO_H + +extern uint64_t __tcb_hwcap attribute_hidden; +extern uint32_t __tcb_platform attribute_hidden; + +extern void __tcb_parse_hwcap_and_convert_at_platform (void); + +#endif diff --git a/REORG.TODO/sysdeps/powerpc/ifunc-sel.h b/REORG.TODO/sysdeps/powerpc/ifunc-sel.h new file mode 100644 index 0000000000..bdb00bf2c6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/ifunc-sel.h @@ -0,0 +1,51 @@ +/* Used by the elf ifunc tests. */ +#ifndef ELF_IFUNC_SEL_H +#define ELF_IFUNC_SEL_H 1 + +extern int global; + +static inline void * +inhibit_stack_protector +ifunc_sel (int (*f1) (void), int (*f2) (void), int (*f3) (void)) +{ + register void *ret __asm__ ("r3"); + __asm__ ("mflr 12\n\t" + "bcl 20,31,1f\n" + "1:\tmflr 11\n\t" + "mtlr 12\n\t" + "addis 12,11,global-1b@ha\n\t" + "lwz 12,global-1b@l(12)\n\t" + "addis %0,11,%2-1b@ha\n\t" + "addi %0,%0,%2-1b@l\n\t" + "cmpwi 12,1\n\t" + "beq 2f\n\t" + "addis %0,11,%3-1b@ha\n\t" + "addi %0,%0,%3-1b@l\n\t" + "cmpwi 12,-1\n\t" + "beq 2f\n\t" + "addis %0,11,%4-1b@ha\n\t" + "addi %0,%0,%4-1b@l\n\t" + "2:" + : "=r" (ret) + : "i" (&global), "i" (f1), "i" (f2), "i" (f3) + : "11", "12", "cr0"); + return ret; +} + +static inline void * +inhibit_stack_protector +ifunc_one (int (*f1) (void)) +{ + register void *ret __asm__ ("r3"); + __asm__ ("mflr 12\n\t" + "bcl 20,31,1f\n" + "1:\tmflr %0\n\t" + "mtlr 12\n\t" + "addis %0,%0,%1-1b@ha\n\t" + "addi %0,%0,%1-1b@l" + : "=r" (ret) + : "i" (f1) + : "12"); + return ret; +} +#endif diff --git a/REORG.TODO/sysdeps/powerpc/jmpbuf-offsets.h b/REORG.TODO/sysdeps/powerpc/jmpbuf-offsets.h new file mode 100644 index 0000000000..e748a35f4d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/jmpbuf-offsets.h @@ -0,0 +1,36 @@ +/* Private macros for accessing __jmp_buf contents. PowerPC version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define JB_GPR1 0 /* Also known as the stack pointer */ +#define JB_GPR2 1 +#define JB_LR 2 /* The address we will return to */ +#if __WORDSIZE == 64 +# define JB_GPRS 3 /* GPRs 14 through 31 are saved, 18*2 words total. */ +# define JB_CR 21 /* Shared dword with VRSAVE. CR word at offset 172. */ +# define JB_FPRS 22 /* FPRs 14 through 31 are saved, 18*2 words total. */ +# define JB_SIZE (64 * 8) /* As per PPC64-VMX ABI. */ +# define JB_VRSAVE 21 /* Shared dword with CR. VRSAVE word at offset 168. */ +# define JB_VRS 40 /* VRs 20 through 31 are saved, 12*4 words total. */ +#else +# define JB_GPRS 3 /* GPRs 14 through 31 are saved, 18 in total. */ +# define JB_CR 21 /* Condition code registers. */ +# define JB_FPRS 22 /* FPRs 14 through 31 are saved, 18*2 words total. */ +# define JB_SIZE ((64 + (12 * 4)) * 4) +# define JB_VRSAVE 62 +# define JB_VRS 64 +#endif diff --git a/REORG.TODO/sysdeps/powerpc/jmpbuf-unwind.h b/REORG.TODO/sysdeps/powerpc/jmpbuf-unwind.h new file mode 100644 index 0000000000..96e2af9e3f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/jmpbuf-unwind.h @@ -0,0 +1,47 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <setjmp.h> +#include <jmpbuf-offsets.h> +#include <stdint.h> +#include <unwind.h> +#include <sysdep.h> + +/* Test if longjmp to JMPBUF would unwind the frame + containing a local variable at ADDRESS. */ +#define _JMPBUF_UNWINDS(jmpbuf, address, demangle) \ + ((void *) (address) < (void *) demangle ((jmpbuf)[JB_GPR1])) + +#define _JMPBUF_CFA_UNWINDS_ADJ(_jmpbuf, _context, _adj) \ + _JMPBUF_UNWINDS_ADJ (_jmpbuf, (void *) _Unwind_GetCFA (_context), _adj) + +static inline uintptr_t __attribute__ ((unused)) +_jmpbuf_sp (__jmp_buf regs) +{ + uintptr_t sp = regs[JB_GPR1]; +#ifdef PTR_DEMANGLE + PTR_DEMANGLE (sp); +#endif + return sp; +} + +#define _JMPBUF_UNWINDS_ADJ(_jmpbuf, _address, _adj) \ + ((uintptr_t) (_address) - (_adj) < _jmpbuf_sp (_jmpbuf) - (_adj)) + +/* We use the normal longjmp for unwinding. */ +#define __libc_unwind_longjmp(buf, val) __libc_longjmp (buf, val) diff --git a/REORG.TODO/sysdeps/powerpc/ldsodefs.h b/REORG.TODO/sysdeps/powerpc/ldsodefs.h new file mode 100644 index 0000000000..466de797fc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/ldsodefs.h @@ -0,0 +1,68 @@ +/* Run-time dynamic linker data structures for loaded ELF shared objects. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _POWERPC_LDSODEFS_H +#define _POWERPC_LDSODEFS_H 1 + +#include <elf.h> + +struct La_ppc32_regs; +struct La_ppc32_retval; +struct La_ppc64_regs; +struct La_ppc64_retval; +struct La_ppc64v2_regs; +struct La_ppc64v2_retval; + +#define ARCH_PLTENTER_MEMBERS \ + Elf32_Addr (*ppc32_gnu_pltenter) (Elf32_Sym *, unsigned int, uintptr_t *, \ + uintptr_t *, struct La_ppc32_regs *, \ + unsigned int *, const char *name, \ + long int *framesizep); \ + Elf64_Addr (*ppc64_gnu_pltenter) (Elf64_Sym *, unsigned int, uintptr_t *, \ + uintptr_t *, struct La_ppc64_regs *, \ + unsigned int *, const char *name, \ + long int *framesizep); \ + Elf64_Addr (*ppc64v2_gnu_pltenter) (Elf64_Sym *, unsigned int, \ + uintptr_t *, uintptr_t *, \ + struct La_ppc64v2_regs *, \ + unsigned int *, const char *name, \ + long int *framesizep) + +#define ARCH_PLTEXIT_MEMBERS \ + unsigned int (*ppc32_gnu_pltexit) (Elf32_Sym *, unsigned int, \ + uintptr_t *, \ + uintptr_t *, \ + const struct La_ppc32_regs *, \ + struct La_ppc32_retval *, \ + const char *); \ + unsigned int (*ppc64_gnu_pltexit) (Elf64_Sym *, unsigned int, \ + uintptr_t *, \ + uintptr_t *, \ + const struct La_ppc64_regs *, \ + struct La_ppc64_retval *, \ + const char *); \ + unsigned int (*ppc64v2_gnu_pltexit) (Elf64_Sym *, unsigned int, \ + uintptr_t *, \ + uintptr_t *, \ + const struct La_ppc64v2_regs *,\ + struct La_ppc64v2_retval *, \ + const char *) + +#include_next <ldsodefs.h> + +#endif diff --git a/REORG.TODO/sysdeps/powerpc/libc-tls.c b/REORG.TODO/sysdeps/powerpc/libc-tls.c new file mode 100644 index 0000000000..76d5c9abb9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/libc-tls.c @@ -0,0 +1,32 @@ +/* Thread-local storage handling in the ELF dynamic linker. PowerPC version. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <csu/libc-tls.c> +#include <dl-tls.h> + +/* On powerpc, the linker usually optimizes code sequences used to access + Thread Local Storage. However, when the user disables these optimizations + by passing --no-tls-optimze to the linker, we need to provide __tls_get_addr + in static libc in order to avoid undefined references to that symbol. */ + +void * +__tls_get_addr (tls_index *ti) +{ + dtv_t *dtv = THREAD_DTV (); + return (char *) dtv[1].pointer.val + ti->ti_offset + TLS_DTV_OFFSET; +} diff --git a/REORG.TODO/sysdeps/powerpc/locale-defines.sym b/REORG.TODO/sysdeps/powerpc/locale-defines.sym new file mode 100644 index 0000000000..5c5379c39f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/locale-defines.sym @@ -0,0 +1,9 @@ +#include <locale/localeinfo.h> + +-- + +LOCALE_CTYPE_TOLOWER offsetof (struct __locale_struct, __ctype_tolower) +LOCALE_CTYPE_TOUPPER offsetof (struct __locale_struct, __ctype_toupper) +_NL_CTYPE_NONASCII_CASE +LOCALE_DATA_VALUES offsetof (struct __locale_data, values) +SIZEOF_VALUES sizeof (((struct __locale_data *) 0)->values[0]) diff --git a/REORG.TODO/sysdeps/powerpc/longjmp.c b/REORG.TODO/sysdeps/powerpc/longjmp.c new file mode 100644 index 0000000000..bd3ed8c22b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/longjmp.c @@ -0,0 +1,60 @@ +/* Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Versioned copy of sysdeps/generic/longjmp.c modified for AltiVec support. */ + +#include <shlib-compat.h> +#include <stddef.h> +#include <setjmp.h> +#include <signal.h> + +extern void __vmx__longjmp (__jmp_buf __env, int __val) + __attribute__ ((noreturn)); +extern void __vmx__libc_longjmp (sigjmp_buf env, int val) + __attribute__ ((noreturn)); +libc_hidden_proto (__vmx__libc_longjmp) + +/* Set the signal mask to the one specified in ENV, and jump + to the position specified in ENV, causing the setjmp + call there to return VAL, or 1 if VAL is 0. */ +void +__vmx__libc_siglongjmp (sigjmp_buf env, int val) +{ + /* Perform any cleanups needed by the frames being unwound. */ + _longjmp_unwind (env, val); + + if (env[0].__mask_was_saved) + /* Restore the saved signal mask. */ + (void) __sigprocmask (SIG_SETMASK, &env[0].__saved_mask, + (sigset_t *) NULL); + + /* Call the machine-dependent function to restore machine state. */ + __vmx__longjmp (env[0].__jmpbuf, val ?: 1); +} + +strong_alias (__vmx__libc_siglongjmp, __vmx__libc_longjmp) +libc_hidden_def (__vmx__libc_longjmp) +weak_alias (__vmx__libc_siglongjmp, __vmx_longjmp) +weak_alias (__vmx__libc_siglongjmp, __vmxlongjmp) +weak_alias (__vmx__libc_siglongjmp, __vmxsiglongjmp) + + +default_symbol_version (__vmx__libc_longjmp, __libc_longjmp, GLIBC_PRIVATE); +default_symbol_version (__vmx__libc_siglongjmp, __libc_siglongjmp, GLIBC_PRIVATE); +versioned_symbol (libc, __vmx_longjmp, _longjmp, GLIBC_2_3_4); +versioned_symbol (libc, __vmxlongjmp, longjmp, GLIBC_2_3_4); +versioned_symbol (libc, __vmxsiglongjmp, siglongjmp, GLIBC_2_3_4); diff --git a/REORG.TODO/sysdeps/powerpc/machine-gmon.h b/REORG.TODO/sysdeps/powerpc/machine-gmon.h new file mode 100644 index 0000000000..3078426a6d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/machine-gmon.h @@ -0,0 +1,30 @@ +/* PowerPC-specific implementation of profiling support. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* We need a special version of the `mcount' function because it has + to preserve more registers than your usual function. */ + +void __mcount_internal (unsigned long frompc, unsigned long selfpc); + +#define _MCOUNT_DECL(frompc, selfpc) \ +void __mcount_internal (unsigned long frompc, unsigned long selfpc) + + +/* Define MCOUNT as empty since we have the implementation in another + file. */ +#define MCOUNT diff --git a/REORG.TODO/sysdeps/powerpc/math-tests.h b/REORG.TODO/sysdeps/powerpc/math-tests.h new file mode 100644 index 0000000000..f7ba200cfc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/math-tests.h @@ -0,0 +1,33 @@ +/* Configuration for math tests. PowerPC version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* On PowerPC, in versions of GCC up to at least 4.7.2, a type cast -- which is + a IEEE 754-2008 general-computational convertFormat operation (IEEE + 754-2008, 5.4.2) -- does not turn a sNaN into a qNaN (whilst raising an + INVALID exception), which is contrary to IEEE 754-2008 5.1 and 7.2. This + renders certain tests infeasible in this scenario. + <http://gcc.gnu.org/PR56828>. */ +#define SNAN_TESTS_TYPE_CAST 0 + +#ifndef __NO_FPRS__ +/* Setting exception flags in FPSCR results in enabled traps for those + exceptions being taken. */ +# define EXCEPTION_SET_FORCES_TRAP 1 +#endif + +#include_next <math-tests.h> diff --git a/REORG.TODO/sysdeps/powerpc/memusage.h b/REORG.TODO/sysdeps/powerpc/memusage.h new file mode 100644 index 0000000000..69f098006a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/memusage.h @@ -0,0 +1,20 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define GETSP() ({ register uintptr_t stack_ptr asm ("%r1"); stack_ptr; }) + +#include <sysdeps/generic/memusage.h> diff --git a/REORG.TODO/sysdeps/powerpc/mp_clz_tab.c b/REORG.TODO/sysdeps/powerpc/mp_clz_tab.c new file mode 100644 index 0000000000..4b5f29bfe5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/mp_clz_tab.c @@ -0,0 +1 @@ +/* __clz_tab not needed on powerpc. */ diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/Makefile b/REORG.TODO/sysdeps/powerpc/nofpu/Makefile new file mode 100644 index 0000000000..35517b63a1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/Makefile @@ -0,0 +1,32 @@ +# Makefile fragment for PowerPC with no FPU. + +ifeq ($(subdir),soft-fp) +sysdep_routines += $(gcc-single-routines) $(gcc-double-routines) \ + sim-full atomic-feholdexcept atomic-feclearexcept \ + atomic-feupdateenv flt-rounds +endif + +ifeq ($(subdir),math) +libm-support += fenv_const +CPPFLAGS += -I../soft-fp/ +# The follow CFLAGS are a work around for GCC Bugzilla Bug 29253 +# "expand_abs wrong default code for floating point" +# As this is not a regression, a fix is not likely to go into +# gcc-4.1.1 and may be too late for gcc-4.2. So we need these flags +# until the fix in a gcc release and glibc drops support for earlier +# versions of gcc. +CFLAGS-e_atan2l.c += -fno-builtin-fabsl +CFLAGS-e_hypotl.c += -fno-builtin-fabsl +CFLAGS-e_powl.c += -fno-builtin-fabsl +CFLAGS-s_ccoshl.c += -fno-builtin-fabsl +CFLAGS-s_csinhl.c += -fno-builtin-fabsl +CFLAGS-s_clogl.c += -fno-builtin-fabsl +CFLAGS-s_clog10l.c += -fno-builtin-fabsl +CFLAGS-s_csinl.c += -fno-builtin-fabsl +CFLAGS-s_csqrtl.c += -fno-builtin-fabsl +CFLAGS-w_acosl_compat.c += -fno-builtin-fabsl +CFLAGS-w_asinl_compat.c += -fno-builtin-fabsl +CFLAGS-w_atanhl_compat.c += -fno-builtin-fabsl +CFLAGS-w_j0l_compat.c += -fno-builtin-fabsl +CFLAGS-w_j1l_compat.c += -fno-builtin-fabsl +endif diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/Subdirs b/REORG.TODO/sysdeps/powerpc/nofpu/Subdirs new file mode 100644 index 0000000000..87eadf3024 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/Subdirs @@ -0,0 +1 @@ +soft-fp diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/Versions b/REORG.TODO/sysdeps/powerpc/nofpu/Versions new file mode 100644 index 0000000000..9f569bd1a5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/Versions @@ -0,0 +1,29 @@ +libc { + GLIBC_2.3.2 { + __sim_exceptions; __sim_disabled_exceptions; __sim_round_mode; + __adddf3; __addsf3; __divdf3; __divsf3; __eqdf2; __eqsf2; + __extendsfdf2; __fixdfsi; __fixsfsi; + __fixunsdfsi; __fixunssfsi; + __floatsidf; __floatsisf; + __gedf2; __gesf2; __ledf2; __lesf2; __muldf3; __mulsf3; + __negdf2; __negsf2; __sqrtdf2; __sqrtsf2; __subdf3; + __subsf3; __truncdfsf2; + } + GLIBC_2.4 { + __floatundidf; __floatundisf; + __floatunsidf; __floatunsisf; + __unorddf2; __unordsf2; + __nedf2; __nesf2; + __gtdf2; __gtsf2; + __ltdf2; __ltsf2; + } + GLIBC_2.19 { + __atomic_feholdexcept; __atomic_feclearexcept; __atomic_feupdateenv; + __flt_rounds; + } + GLIBC_PRIVATE { + __sim_exceptions_thread; + __sim_disabled_exceptions_thread; + __sim_round_mode_thread; + } +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/atomic-feclearexcept.c b/REORG.TODO/sysdeps/powerpc/nofpu/atomic-feclearexcept.c new file mode 100644 index 0000000000..6946e19337 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/atomic-feclearexcept.c @@ -0,0 +1,28 @@ +/* Clear floating-point exceptions for atomic compound assignment. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +void +__atomic_feclearexcept (void) +{ + /* This function postdates the global variables being turned into + compat symbols, so no need to set them. */ + __sim_exceptions_thread = 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/atomic-feholdexcept.c b/REORG.TODO/sysdeps/powerpc/nofpu/atomic-feholdexcept.c new file mode 100644 index 0000000000..50cf1b96ae --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/atomic-feholdexcept.c @@ -0,0 +1,38 @@ +/* Store current floating-point environment and clear exceptions for + atomic compound assignment. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +void +__atomic_feholdexcept (fenv_t *envp) +{ + fenv_union_t u; + + u.l[0] = __sim_exceptions_thread; + /* The rounding mode is not changed by arithmetic, so no need to + save it. */ + u.l[1] = __sim_disabled_exceptions_thread; + *envp = u.fenv; + + /* This function postdates the global variables being turned into + compat symbols, so no need to set them. */ + __sim_exceptions_thread = 0; + __sim_disabled_exceptions_thread = FE_ALL_EXCEPT; +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/atomic-feupdateenv.c b/REORG.TODO/sysdeps/powerpc/nofpu/atomic-feupdateenv.c new file mode 100644 index 0000000000..e74178ff12 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/atomic-feupdateenv.c @@ -0,0 +1,37 @@ +/* Install given floating-point environment and raise exceptions for + atomic compound assignment. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" +#include <signal.h> + +void +__atomic_feupdateenv (const fenv_t *envp) +{ + fenv_union_t u; + int saved_exceptions = __sim_exceptions_thread; + + /* This function postdates the global variables being turned into + compat symbols, so no need to set them. */ + u.fenv = *envp; + __sim_exceptions_thread |= u.l[0]; + __sim_disabled_exceptions_thread = u.l[1]; + if (saved_exceptions & ~__sim_disabled_exceptions_thread) + raise (SIGFPE); +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fclrexcpt.c b/REORG.TODO/sysdeps/powerpc/nofpu/fclrexcpt.c new file mode 100644 index 0000000000..1c8e578afc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fclrexcpt.c @@ -0,0 +1,38 @@ +/* Clear floating-point exceptions (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +__feclearexcept (int x) +{ + __sim_exceptions_thread &= ~x; + SIM_SET_GLOBAL (__sim_exceptions_global, __sim_exceptions_thread); + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feclearexcept, __old_feclearexcept) +compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1); +#endif + +libm_hidden_ver (__feclearexcept, feclearexcept) +versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fedisblxcpt.c b/REORG.TODO/sysdeps/powerpc/nofpu/fedisblxcpt.c new file mode 100644 index 0000000000..6514f45a73 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fedisblxcpt.c @@ -0,0 +1,34 @@ +/* Disable exceptions (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" +#include <fenv.h> + +int +fedisableexcept (int x) +{ + int old_exceptions = ~__sim_disabled_exceptions_thread & FE_ALL_EXCEPT; + + __sim_disabled_exceptions_thread |= x; + SIM_SET_GLOBAL (__sim_disabled_exceptions_global, + __sim_disabled_exceptions_thread); + + return old_exceptions; +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/feenablxcpt.c b/REORG.TODO/sysdeps/powerpc/nofpu/feenablxcpt.c new file mode 100644 index 0000000000..cd71e4e9cd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/feenablxcpt.c @@ -0,0 +1,33 @@ +/* Enable exceptions (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-supp.h" +#include <fenv.h> + +int +feenableexcept (int exceptions) +{ + int old_exceptions = ~__sim_disabled_exceptions_thread & FE_ALL_EXCEPT; + + __sim_disabled_exceptions_thread &= ~exceptions; + SIM_SET_GLOBAL (__sim_disabled_exceptions_global, + __sim_disabled_exceptions_thread); + + return old_exceptions; +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fegetenv.c b/REORG.TODO/sysdeps/powerpc/nofpu/fegetenv.c new file mode 100644 index 0000000000..079946c58e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fegetenv.c @@ -0,0 +1,45 @@ +/* Store current floating-point environment (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002, 2010. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +__fegetenv (fenv_t *envp) +{ + fenv_union_t u; + + u.l[0] = __sim_exceptions_thread; + u.l[0] |= __sim_round_mode_thread; + u.l[1] = __sim_disabled_exceptions_thread; + + *envp = u.fenv; + + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fegetenv, __old_fegetenv) +compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1); +#endif + +libm_hidden_def (__fegetenv) +libm_hidden_ver (__fegetenv, fegetenv) +versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fegetexcept.c b/REORG.TODO/sysdeps/powerpc/nofpu/fegetexcept.c new file mode 100644 index 0000000000..36f4f45b36 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fegetexcept.c @@ -0,0 +1,27 @@ +/* Get floating-point exceptions (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +fegetexcept (void) +{ + return (__sim_disabled_exceptions_thread ^ FE_ALL_EXCEPT) & FE_ALL_EXCEPT; +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fegetmode.c b/REORG.TODO/sysdeps/powerpc/nofpu/fegetmode.c new file mode 100644 index 0000000000..2346858eda --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fegetmode.c @@ -0,0 +1,33 @@ +/* Store current floating-point control modes. PowerPC soft-float version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +fegetmode (femode_t *modep) +{ + fenv_union_t u; + + u.l[0] = __sim_round_mode_thread; + u.l[1] = __sim_disabled_exceptions_thread; + + *modep = u.fenv; + + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fegetround.c b/REORG.TODO/sysdeps/powerpc/nofpu/fegetround.c new file mode 100644 index 0000000000..7d7dfbaeef --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fegetround.c @@ -0,0 +1,30 @@ +/* Return current rounding mode (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +__fegetround (void) +{ + return __sim_round_mode_thread; +} +libm_hidden_def (__fegetround) +weak_alias (__fegetround, fegetround) +libm_hidden_weak (fegetround) diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/feholdexcpt.c b/REORG.TODO/sysdeps/powerpc/nofpu/feholdexcpt.c new file mode 100644 index 0000000000..b0dce0bed1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/feholdexcpt.c @@ -0,0 +1,45 @@ +/* Store current floating-point environment and clear exceptions + (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +__feholdexcept (fenv_t *envp) +{ + fenv_union_t u; + + /* Get the current state. */ + __fegetenv (envp); + + u.fenv = *envp; + /* Clear everything except the rounding mode. */ + u.l[0] &= 0x3; + /* Disable exceptions */ + u.l[1] = FE_ALL_EXCEPT; + + /* Put the new state in effect. */ + __fesetenv (&u.fenv); + + return 0; +} +libm_hidden_def (__feholdexcept) +weak_alias (__feholdexcept, feholdexcept) +libm_hidden_weak (feholdexcept) diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fenv_const.c b/REORG.TODO/sysdeps/powerpc/nofpu/fenv_const.c new file mode 100644 index 0000000000..451070bcdf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fenv_const.c @@ -0,0 +1,38 @@ +/* Constants for fenv_bits.h (soft float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +/* We want to specify the bit pattern of the __fe_*_env constants, so + pretend they're really `long long' instead of `double'. */ + +/* If the default argument is used we use this value. Disable all + signalling exceptions as default. */ +const unsigned long long __fe_dfl_env __attribute__ ((aligned (8))) = +0x000000003e000000ULL; + +/* The same representation is used for femode_t. */ +extern const unsigned long long __fe_dfl_mode + __attribute__ ((aligned (8), alias ("__fe_dfl_env"))); + +/* Floating-point environment where none of the exceptions are masked. */ +const unsigned long long __fe_enabled_env __attribute__ ((aligned (8))) = +0x0000000000000000ULL; + +/* Floating-point environment with the NI bit set. No difference for + soft float from the default environment. */ +strong_alias (__fe_dfl_env, __fe_nonieee_env) diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fesetenv.c b/REORG.TODO/sysdeps/powerpc/nofpu/fesetenv.c new file mode 100644 index 0000000000..f6ef1737bd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fesetenv.c @@ -0,0 +1,47 @@ +/* Set floating point environment (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +__fesetenv (const fenv_t *envp) +{ + fenv_union_t u; + + u.fenv = *envp; + __sim_exceptions_thread = u.l[0] & FE_ALL_EXCEPT; + SIM_SET_GLOBAL (__sim_exceptions_global, __sim_exceptions_thread); + __sim_round_mode_thread = u.l[0] & 0x3; + SIM_SET_GLOBAL (__sim_round_mode_global, __sim_round_mode_thread); + __sim_disabled_exceptions_thread = u.l[1]; + SIM_SET_GLOBAL (__sim_disabled_exceptions_global, + __sim_disabled_exceptions_thread); + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fesetenv, __old_fesetenv) +compat_symbol (libm, __old_fesetenv, fesetenv, GLIBC_2_1); +#endif + +libm_hidden_def (__fesetenv) +libm_hidden_ver (__fesetenv, fesetenv) +versioned_symbol (libm, __fesetenv, fesetenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fesetexcept.c b/REORG.TODO/sysdeps/powerpc/nofpu/fesetexcept.c new file mode 100644 index 0000000000..c22ee2e952 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fesetexcept.c @@ -0,0 +1,28 @@ +/* Set given exception flags. PowerPC soft-float version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-supp.h" + +int +fesetexcept (int excepts) +{ + __sim_exceptions_thread |= (excepts & FE_ALL_EXCEPT); + SIM_SET_GLOBAL (__sim_exceptions_global, __sim_exceptions_thread); + + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fesetmode.c b/REORG.TODO/sysdeps/powerpc/nofpu/fesetmode.c new file mode 100644 index 0000000000..85ce8b3ae1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fesetmode.c @@ -0,0 +1,34 @@ +/* Install given floating-point control modes. PowerPC soft-float version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +fesetmode (const femode_t *modep) +{ + fenv_union_t u; + + u.fenv = *modep; + __sim_round_mode_thread = u.l[0]; + SIM_SET_GLOBAL (__sim_round_mode_global, __sim_round_mode_thread); + __sim_disabled_exceptions_thread = u.l[1]; + SIM_SET_GLOBAL (__sim_disabled_exceptions_global, + __sim_disabled_exceptions_thread); + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fesetround.c b/REORG.TODO/sysdeps/powerpc/nofpu/fesetround.c new file mode 100644 index 0000000000..2ca60b5014 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fesetround.c @@ -0,0 +1,36 @@ +/* Set rounding mode (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +__fesetround (int round) +{ + if ((unsigned int) round > FE_DOWNWARD) + return 1; + + __sim_round_mode_thread = round; + SIM_SET_GLOBAL (__sim_round_mode_global, __sim_round_mode_thread); + + return 0; +} +libm_hidden_def (__fesetround) +weak_alias (__fesetround, fesetround) +libm_hidden_weak (fesetround) diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/feupdateenv.c b/REORG.TODO/sysdeps/powerpc/nofpu/feupdateenv.c new file mode 100644 index 0000000000..316b7a96f3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/feupdateenv.c @@ -0,0 +1,53 @@ +/* Install given floating-point environment and raise exceptions + (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" +#include <signal.h> + +int +__feupdateenv (const fenv_t *envp) +{ + int saved_exceptions; + + /* Save currently set exceptions. */ + saved_exceptions = __sim_exceptions_thread; + + /* Set environment. */ + __fesetenv (envp); + + /* Raise old exceptions. */ + __sim_exceptions_thread |= saved_exceptions; + SIM_SET_GLOBAL (__sim_exceptions_global, __sim_exceptions_thread); + if (saved_exceptions & ~__sim_disabled_exceptions_thread) + raise (SIGFPE); + + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feupdateenv, __old_feupdateenv) +compat_symbol (libm, __old_feupdateenv, feupdateenv, GLIBC_2_1); +#endif + +libm_hidden_def (__feupdateenv) +libm_hidden_ver (__feupdateenv, feupdateenv) +versioned_symbol (libm, __feupdateenv, feupdateenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fgetexcptflg.c b/REORG.TODO/sysdeps/powerpc/nofpu/fgetexcptflg.c new file mode 100644 index 0000000000..4247be2352 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fgetexcptflg.c @@ -0,0 +1,37 @@ +/* Store current representation for exceptions (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +__fegetexceptflag (fexcept_t *flagp, int excepts) +{ + *flagp = (fexcept_t) __sim_exceptions_thread & excepts & FE_ALL_EXCEPT; + + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fegetexceptflag, __old_fegetexceptflag) +compat_symbol (libm, __old_fegetexceptflag, fegetexceptflag, GLIBC_2_1); +#endif + +versioned_symbol (libm, __fegetexceptflag, fegetexceptflag, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/flt-rounds.c b/REORG.TODO/sysdeps/powerpc/nofpu/flt-rounds.c new file mode 100644 index 0000000000..744ba95d60 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/flt-rounds.c @@ -0,0 +1,40 @@ +/* Return current rounding mode as correct value for FLT_ROUNDS. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdlib.h> + +#include "soft-fp.h" +#include "soft-supp.h" + +int +__flt_rounds (void) +{ + switch (__sim_round_mode_thread) + { + case FP_RND_ZERO: + return 0; + case FP_RND_NEAREST: + return 1; + case FP_RND_PINF: + return 2; + case FP_RND_MINF: + return 3; + default: + abort (); + } +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fraiseexcpt.c b/REORG.TODO/sysdeps/powerpc/nofpu/fraiseexcpt.c new file mode 100644 index 0000000000..585c1c7d95 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fraiseexcpt.c @@ -0,0 +1,43 @@ +/* Raise given exceptions (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" +#include <signal.h> + +#undef feraiseexcept +int +__feraiseexcept (int x) +{ + __sim_exceptions_thread |= x; + SIM_SET_GLOBAL (__sim_exceptions_global, __sim_exceptions_thread); + if (x & ~__sim_disabled_exceptions_thread) + raise (SIGFPE); + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feraiseexcept, __old_feraiseexcept) +compat_symbol (libm, __old_feraiseexcept, feraiseexcept, GLIBC_2_1); +#endif + +libm_hidden_def (__feraiseexcept) +libm_hidden_ver (__feraiseexcept, feraiseexcept) +versioned_symbol (libm, __feraiseexcept, feraiseexcept, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/fsetexcptflg.c b/REORG.TODO/sysdeps/powerpc/nofpu/fsetexcptflg.c new file mode 100644 index 0000000000..10b64285d3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/fsetexcptflg.c @@ -0,0 +1,40 @@ +/* Set floating-point environment exception handling (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +__fesetexceptflag(const fexcept_t *flagp, int excepts) +{ + /* Ignore exceptions not listed in 'excepts'. */ + __sim_exceptions_thread + = (__sim_exceptions_thread & ~excepts) | (*flagp & excepts); + SIM_SET_GLOBAL (__sim_exceptions_global, __sim_exceptions_thread); + + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fesetexceptflag, __old_fesetexceptflag) +compat_symbol (libm, __old_fesetexceptflag, fesetexceptflag, GLIBC_2_1); +#endif + +versioned_symbol (libm, __fesetexceptflag, fesetexceptflag, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/ftestexcept.c b/REORG.TODO/sysdeps/powerpc/nofpu/ftestexcept.c new file mode 100644 index 0000000000..56c804e558 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/ftestexcept.c @@ -0,0 +1,28 @@ +/* Test floating-point exceptions (soft-float edition). + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "soft-supp.h" + +int +fetestexcept (int x) +{ + return __sim_exceptions_thread & x; +} +libm_hidden_def (fetestexcept) diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/get-rounding-mode.h b/REORG.TODO/sysdeps/powerpc/nofpu/get-rounding-mode.h new file mode 100644 index 0000000000..4e6206e121 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/get-rounding-mode.h @@ -0,0 +1,35 @@ +/* Determine floating-point rounding mode within libc. PowerPC + soft-float version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _POWERPC_NOFPU_GET_ROUNDING_MODE_H +#define _POWERPC_NOFPU_GET_ROUNDING_MODE_H 1 + +#include <fenv.h> + +#include "soft-supp.h" + +/* Return the floating-point rounding mode. */ + +static inline int +get_rounding_mode (void) +{ + return __sim_round_mode_thread; +} + +#endif /* get-rounding-mode.h */ diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/libm-test-ulps b/REORG.TODO/sysdeps/powerpc/nofpu/libm-test-ulps new file mode 100644 index 0000000000..8935f0d714 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/libm-test-ulps @@ -0,0 +1,2364 @@ +# Begin of automatic generation + +# Maximal error of functions: +Function: "acos": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "acos_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "acos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "acos_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "acosh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "acosh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "acosh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "acosh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 4 + +Function: "asin": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asin_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "asin_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "asinh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: "asinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "asinh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 7 +ldouble: 7 + +Function: "atan": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "atan2": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "atan2_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "atan2_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: "atan2_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "atan_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "atan_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "atan_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atanh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: "atanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "atanh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "cabs": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_downward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_towardzero": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "cabs_upward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cacos": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "cacos": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cacos_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "cacos_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "cacos_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "cacos_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "cacos_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "cacos_upward": +double: 5 +float: 7 +idouble: 5 +ifloat: 7 +ildouble: 13 +ldouble: 13 + +Function: Real part of "cacosh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "cacosh": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cacosh_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "cacosh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "cacosh_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "cacosh_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Real part of "cacosh_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 12 +ldouble: 12 + +Function: Imaginary part of "cacosh_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "carg": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "carg_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "carg_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: "carg_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "casin": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "casin": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "casin_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "casin_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "casin_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "casin_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "casin_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "casin_upward": +double: 5 +float: 7 +idouble: 5 +ifloat: 7 +ildouble: 13 +ldouble: 13 + +Function: Real part of "casinh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "casinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "casinh_downward": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "casinh_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "casinh_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "casinh_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 5 +ldouble: 5 + +Function: Real part of "casinh_upward": +double: 5 +float: 7 +idouble: 5 +ifloat: 7 +ildouble: 13 +ldouble: 13 + +Function: Imaginary part of "casinh_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Real part of "catan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "catan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "catan_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "catan_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Real part of "catan_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "catan_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catan_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "catan_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "catanh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "catanh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Real part of "catanh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "catanh_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Real part of "catanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "catanh_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: Real part of "catanh_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "catanh_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: "cbrt": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "cbrt_downward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 5 +ldouble: 5 + +Function: "cbrt_towardzero": +double: 3 +float: 1 +idouble: 3 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "cbrt_upward": +double: 5 +float: 1 +idouble: 5 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccos": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "ccos": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccos_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ccos_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ccos_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ccos_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ccos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ccos_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Real part of "ccosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "ccosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ccosh_downward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ccosh_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ccosh_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 10 +ldouble: 10 + +Function: Imaginary part of "ccosh_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ccosh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ccosh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Real part of "cexp": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "cexp": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cexp_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 11 +ldouble: 11 + +Function: Imaginary part of "cexp_downward": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 11 +ldouble: 11 + +Function: Real part of "cexp_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 11 +ldouble: 11 + +Function: Imaginary part of "cexp_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 11 +ldouble: 11 + +Function: Real part of "cexp_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cexp_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "clog": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "clog": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "clog10": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "clog10": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "clog10_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "clog10_downward": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 7 +ldouble: 7 + +Function: Real part of "clog10_towardzero": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "clog10_towardzero": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 8 +ldouble: 8 + +Function: Real part of "clog10_upward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "clog10_upward": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 7 +ldouble: 7 + +Function: Real part of "clog_downward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "clog_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: Real part of "clog_towardzero": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "clog_towardzero": +double: 1 +float: 3 +idouble: 1 +ifloat: 3 +ildouble: 7 +ldouble: 7 + +Function: Real part of "clog_upward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "clog_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "cos": +float: 1 +ifloat: 1 +ildouble: 4 +ldouble: 4 + +Function: "cos_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "cos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 4 +ldouble: 4 + +Function: "cos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "cosh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "cosh_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "cosh_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "cosh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: Real part of "cpow": +double: 2 +float: 4 +idouble: 2 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "cpow": +float: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "cpow_downward": +double: 4 +float: 8 +idouble: 4 +ifloat: 8 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "cpow_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Real part of "cpow_towardzero": +double: 4 +float: 8 +idouble: 4 +ifloat: 8 +ildouble: 8 +ldouble: 8 + +Function: Imaginary part of "cpow_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: Real part of "cpow_upward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "cpow_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csin": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "csin": +ildouble: 1 +ldouble: 1 + +Function: Real part of "csin_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "csin_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: Real part of "csin_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "csin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: Real part of "csin_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "csin_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csinh": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "csinh": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "csinh_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "csinh_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "csinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "csinh_towardzero": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: Real part of "csinh_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "csinh_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "csqrt": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Imaginary part of "csqrt": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: Real part of "csqrt_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "csqrt_downward": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "csqrt_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Imaginary part of "csqrt_towardzero": +double: 4 +float: 3 +idouble: 4 +ifloat: 3 +ildouble: 5 +ldouble: 5 + +Function: Real part of "csqrt_upward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 12 +ldouble: 12 + +Function: Imaginary part of "csqrt_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: Real part of "ctan": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: Imaginary part of "ctan": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Real part of "ctan_downward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 6 +ldouble: 6 + +Function: Imaginary part of "ctan_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: Real part of "ctan_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: Imaginary part of "ctan_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 13 +ldouble: 13 + +Function: Real part of "ctan_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 7 +ldouble: 7 + +Function: Imaginary part of "ctan_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 10 +ldouble: 10 + +Function: Real part of "ctanh": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: Imaginary part of "ctanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: Real part of "ctanh_downward": +double: 4 +float: 1 +idouble: 4 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: Imaginary part of "ctanh_downward": +double: 6 +float: 5 +idouble: 6 +ifloat: 5 +ildouble: 6 +ldouble: 6 + +Function: Real part of "ctanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 13 +ldouble: 13 + +Function: Imaginary part of "ctanh_towardzero": +double: 5 +float: 3 +idouble: 5 +ifloat: 3 +ildouble: 10 +ldouble: 10 + +Function: Real part of "ctanh_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 10 +ldouble: 10 + +Function: Imaginary part of "ctanh_upward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 10 +ldouble: 10 + +Function: "erf": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "erf_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "erf_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "erf_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "erfc": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "erfc_downward": +double: 5 +float: 6 +idouble: 5 +ifloat: 6 +ildouble: 10 +ldouble: 10 + +Function: "erfc_towardzero": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 9 +ldouble: 9 + +Function: "erfc_upward": +double: 5 +float: 6 +idouble: 5 +ifloat: 6 +ildouble: 7 +ldouble: 7 + +Function: "exp": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp10": +double: 2 +idouble: 2 +ildouble: 1 +ldouble: 1 + +Function: "exp10_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: "exp10_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: "exp10_upward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 4 +ldouble: 4 + +Function: "exp2": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp2_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp2_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "exp2_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "exp_downward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp_towardzero": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "exp_upward": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "expm1": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "expm1_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "expm1_towardzero": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "expm1_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 6 +ldouble: 6 + +Function: "fma": +ildouble: 1 +ldouble: 1 + +Function: "fma_downward": +ildouble: 1 +ldouble: 1 + +Function: "fma_towardzero": +ildouble: 2 +ldouble: 2 + +Function: "fma_upward": +ildouble: 3 +ldouble: 3 + +Function: "fmod": +ildouble: 1 +ldouble: 1 + +Function: "fmod_downward": +ildouble: 1 +ldouble: 1 + +Function: "fmod_towardzero": +ildouble: 1 +ldouble: 1 + +Function: "fmod_upward": +ildouble: 1 +ldouble: 1 + +Function: "gamma": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: "gamma_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 15 +ldouble: 15 + +Function: "gamma_towardzero": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 16 +ldouble: 16 + +Function: "gamma_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 11 +ldouble: 11 + +Function: "hypot": +double: 1 +idouble: 1 +ildouble: 1 +ldouble: 1 + +Function: "hypot_downward": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 + +Function: "hypot_towardzero": +double: 1 +idouble: 1 +ildouble: 2 +ldouble: 2 + +Function: "hypot_upward": +double: 1 +idouble: 1 +ildouble: 3 +ldouble: 3 + +Function: "j0": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "j0_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 11 +ldouble: 11 + +Function: "j0_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 8 +ldouble: 8 + +Function: "j0_upward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "j1": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "j1_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: "j1_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: "j1_upward": +double: 3 +float: 5 +idouble: 3 +ifloat: 5 +ildouble: 6 +ldouble: 6 + +Function: "jn": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: "jn_downward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 6 +ldouble: 6 + +Function: "jn_towardzero": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 6 +ldouble: 6 + +Function: "jn_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 5 +ldouble: 5 + +Function: "ldexp_downward": +ildouble: 1 +ldouble: 1 + +Function: "ldexp_upward": +ildouble: 1 +ldouble: 1 + +Function: "lgamma": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: "lgamma_downward": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 15 +ldouble: 15 + +Function: "lgamma_towardzero": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 16 +ldouble: 16 + +Function: "lgamma_upward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 11 +ldouble: 11 + +Function: "log": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "log10": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log10_downward": +double: 2 +float: 3 +idouble: 2 +ifloat: 3 +ildouble: 1 +ldouble: 1 + +Function: "log10_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log10_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log1p": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "log1p_downward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log1p_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log1p_upward": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log2": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "log2_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 2 +ldouble: 2 + +Function: "log2_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "log2_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "log_downward": +float: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "log_towardzero": +float: 2 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "log_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "nextafter_downward": +ildouble: 1 +ldouble: 1 + +Function: "nextafter_upward": +ildouble: 1 +ldouble: 1 + +Function: "pow": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "pow10": +double: 2 +idouble: 2 +ildouble: 1 +ldouble: 1 + +Function: "pow10_downward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: "pow10_towardzero": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 9 +ldouble: 9 + +Function: "pow10_upward": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 4 +ldouble: 4 + +Function: "pow_downward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "pow_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "pow_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "scalb_downward": +ildouble: 1 +ldouble: 1 + +Function: "scalb_upward": +ildouble: 1 +ldouble: 1 + +Function: "scalbln_downward": +ildouble: 1 +ldouble: 1 + +Function: "scalbln_upward": +ildouble: 1 +ldouble: 1 + +Function: "scalbn_downward": +ildouble: 1 +ldouble: 1 + +Function: "scalbn_upward": +ildouble: 1 +ldouble: 1 + +Function: "sin": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "sin_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "sin_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 4 +ldouble: 4 + +Function: "sin_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 5 +ldouble: 5 + +Function: "sincos": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "sincos_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "sincos_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 7 +ldouble: 7 + +Function: "sincos_upward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: "sinh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "sinh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: "sinh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 6 +ldouble: 6 + +Function: "sinh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: "sqrt": +ildouble: 1 +ldouble: 1 + +Function: "sqrt_downward": +ildouble: 1 +ldouble: 1 + +Function: "sqrt_towardzero": +ildouble: 1 +ldouble: 1 + +Function: "sqrt_upward": +ildouble: 1 +ldouble: 1 + +Function: "tan": +float: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "tan_downward": +double: 1 +float: 2 +idouble: 1 +ifloat: 2 +ildouble: 3 +ldouble: 3 + +Function: "tan_towardzero": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 2 +ldouble: 2 + +Function: "tan_upward": +double: 1 +float: 1 +idouble: 1 +ifloat: 1 +ildouble: 3 +ldouble: 3 + +Function: "tanh": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 1 +ldouble: 1 + +Function: "tanh_downward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 4 +ldouble: 4 + +Function: "tanh_towardzero": +double: 2 +float: 2 +idouble: 2 +ifloat: 2 +ildouble: 4 +ldouble: 4 + +Function: "tanh_upward": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 6 +ldouble: 6 + +Function: "tgamma": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: "tgamma_downward": +double: 5 +float: 5 +idouble: 5 +ifloat: 5 +ildouble: 4 +ldouble: 4 + +Function: "tgamma_towardzero": +double: 5 +float: 4 +idouble: 5 +ifloat: 4 +ildouble: 3 +ldouble: 3 + +Function: "tgamma_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 4 +ldouble: 4 + +Function: "y0": +double: 2 +float: 1 +idouble: 2 +ifloat: 1 +ildouble: 1 +ldouble: 1 + +Function: "y0_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 10 +ldouble: 10 + +Function: "y0_towardzero": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: "y0_upward": +double: 3 +float: 4 +idouble: 3 +ifloat: 4 +ildouble: 7 +ldouble: 7 + +Function: "y1": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "y1_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: "y1_towardzero": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 7 +ldouble: 7 + +Function: "y1_upward": +double: 7 +float: 2 +idouble: 7 +ifloat: 2 +ildouble: 9 +ldouble: 9 + +Function: "yn": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 2 +ldouble: 2 + +Function: "yn_downward": +double: 3 +float: 2 +idouble: 3 +ifloat: 2 +ildouble: 10 +ldouble: 10 + +Function: "yn_towardzero": +double: 3 +float: 3 +idouble: 3 +ifloat: 3 +ildouble: 8 +ldouble: 8 + +Function: "yn_upward": +double: 4 +float: 4 +idouble: 4 +ifloat: 4 +ildouble: 5 +ldouble: 5 + +# end of automatic generation diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/libm-test-ulps-name b/REORG.TODO/sysdeps/powerpc/nofpu/libm-test-ulps-name new file mode 100644 index 0000000000..3ed2c6ec8a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/libm-test-ulps-name @@ -0,0 +1 @@ +PowerPC soft-float diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/sim-full.c b/REORG.TODO/sysdeps/powerpc/nofpu/sim-full.c new file mode 100644 index 0000000000..da48f3714e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/sim-full.c @@ -0,0 +1,57 @@ +/* Software floating-point exception handling emulation. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <signal.h> +#include "soft-fp.h" +#include "soft-supp.h" + +/* Thread-local to store sticky exceptions. */ +__thread int __sim_exceptions_thread __attribute__ ((nocommon)); +libc_hidden_data_def (__sim_exceptions_thread); + +/* By default, no exceptions should trap. */ +__thread int __sim_disabled_exceptions_thread = 0xffffffff; +libc_hidden_data_def (__sim_disabled_exceptions_thread); + +__thread int __sim_round_mode_thread __attribute__ ((nocommon)); +libc_hidden_data_def (__sim_round_mode_thread); + +#if SIM_GLOBAL_COMPAT +int __sim_exceptions_global __attribute__ ((nocommon)); +libc_hidden_data_def (__sim_exceptions_global); +SIM_COMPAT_SYMBOL (__sim_exceptions_global, __sim_exceptions); + +int __sim_disabled_exceptions_global = 0xffffffff; +libc_hidden_data_def (__sim_disabled_exceptions_global); +SIM_COMPAT_SYMBOL (__sim_disabled_exceptions_global, + __sim_disabled_exceptions); + +int __sim_round_mode_global __attribute__ ((nocommon)); +libc_hidden_data_def (__sim_round_mode_global); +SIM_COMPAT_SYMBOL (__sim_round_mode_global, __sim_round_mode); +#endif + +void +__simulate_exceptions (int x) +{ + __sim_exceptions_thread |= x; + SIM_SET_GLOBAL (__sim_exceptions_global, __sim_exceptions_thread); + if (x & ~__sim_disabled_exceptions_thread) + raise (SIGFPE); +} diff --git a/REORG.TODO/sysdeps/powerpc/nofpu/soft-supp.h b/REORG.TODO/sysdeps/powerpc/nofpu/soft-supp.h new file mode 100644 index 0000000000..f66d9573fa --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nofpu/soft-supp.h @@ -0,0 +1,63 @@ +/* Internal support stuff for complete soft float. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#if defined __NO_FPRS__ && !defined _SOFT_FLOAT + +# include <fenv_libc.h> + +#else + +# include <fenv.h> + +typedef union +{ + fenv_t fenv; + unsigned int l[2]; +} fenv_union_t; + +#endif + +extern __thread int __sim_exceptions_thread attribute_tls_model_ie; +libc_hidden_tls_proto (__sim_exceptions_thread, tls_model ("initial-exec")); +extern __thread int __sim_disabled_exceptions_thread attribute_tls_model_ie; +libc_hidden_tls_proto (__sim_disabled_exceptions_thread, + tls_model ("initial-exec")); +extern __thread int __sim_round_mode_thread attribute_tls_model_ie; +libc_hidden_tls_proto (__sim_round_mode_thread, tls_model ("initial-exec")); + +/* These variables were formerly global, so there are compat symbols + for global versions as well. */ + +#include <shlib-compat.h> +#define SIM_GLOBAL_COMPAT SHLIB_COMPAT (libc, GLIBC_2_3_2, GLIBC_2_19) +#if SIM_GLOBAL_COMPAT +extern int __sim_exceptions_global; +libc_hidden_proto (__sim_exceptions_global); +extern int __sim_disabled_exceptions_global ; +libc_hidden_proto (__sim_disabled_exceptions_global); +extern int __sim_round_mode_global; +libc_hidden_proto (__sim_round_mode_global); +# define SIM_COMPAT_SYMBOL(GLOBAL_NAME, NAME) \ + compat_symbol (libc, GLOBAL_NAME, NAME, GLIBC_2_3_2) +# define SIM_SET_GLOBAL(GLOBAL_VAR, THREAD_VAR) ((GLOBAL_VAR) = (THREAD_VAR)) +#else +# define SIM_SET_GLOBAL(GLOBAL_VAR, THREAD_VAR) ((void) 0) +#endif + +extern void __simulate_exceptions (int x) attribute_hidden; diff --git a/REORG.TODO/sysdeps/powerpc/novmx-longjmp.c b/REORG.TODO/sysdeps/powerpc/novmx-longjmp.c new file mode 100644 index 0000000000..b0020b728a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/novmx-longjmp.c @@ -0,0 +1,56 @@ +/* Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy of sysdeps/generic/longjmp.c modified for backward compatibility + with old non AltiVec/VMX longjmp. */ + +#include <bits/wordsize.h> +#include <shlib-compat.h> +#if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +# include <stddef.h> +# include <novmxsetjmp.h> +# include <signal.h> + + +/* Set the signal mask to the one specified in ENV, and jump + to the position specified in ENV, causing the setjmp + call there to return VAL, or 1 if VAL is 0. */ +void +__novmx__libc_siglongjmp (__novmx__sigjmp_buf env, int val) +{ + /* Perform any cleanups needed by the frames being unwound. */ + _longjmp_unwind (env, val); + + if (env[0].__mask_was_saved) + /* Restore the saved signal mask. */ + (void) __sigprocmask (SIG_SETMASK, &env[0].__saved_mask, + (sigset_t *) NULL); + + /* Call the machine-dependent function to restore machine state. */ + __novmx__longjmp (env[0].__jmpbuf, val ?: 1); +} + +strong_alias (__novmx__libc_siglongjmp, __novmx__libc_longjmp) +libc_hidden_def (__novmx__libc_longjmp) +weak_alias (__novmx__libc_siglongjmp, __novmx_longjmp) +weak_alias (__novmx__libc_siglongjmp, __novmxlongjmp) +weak_alias (__novmx__libc_siglongjmp, __novmxsiglongjmp) + +compat_symbol (libc, __novmx_longjmp, _longjmp, GLIBC_2_0); +compat_symbol (libc, __novmxlongjmp, longjmp, GLIBC_2_0); +compat_symbol (libc, __novmxsiglongjmp, siglongjmp, GLIBC_2_0); +#endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)) */ diff --git a/REORG.TODO/sysdeps/powerpc/novmx-sigjmp.c b/REORG.TODO/sysdeps/powerpc/novmx-sigjmp.c new file mode 100644 index 0000000000..7d0ae59437 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/novmx-sigjmp.c @@ -0,0 +1,44 @@ +/* Copyright (C) 1992-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Copy of sysdeps/generic/sigjmp.c modified for backward compatibility + with old non AltiVec/VMX setjmp. */ + +#include <bits/wordsize.h> +#include <shlib-compat.h> +#if IS_IN (libc) && defined SHARED +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +# include <stddef.h> +# include <novmxsetjmp.h> +# include <signal.h> + +/* This function is called by the `sigsetjmp' macro + before doing a `__setjmp' on ENV[0].__jmpbuf. + Always return zero. */ + +int +__novmx__sigjmp_save (__novmx__sigjmp_buf env, int savemask) +{ + env[0].__mask_was_saved = (savemask && + __sigprocmask (SIG_BLOCK, (sigset_t *) NULL, + &env[0].__saved_mask) == 0); + + return 0; +} + +# endif /* SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) */ +#endif /* IS_IN (libc) && SHARED */ diff --git a/REORG.TODO/sysdeps/powerpc/novmxsetjmp.h b/REORG.TODO/sysdeps/powerpc/novmxsetjmp.h new file mode 100644 index 0000000000..aa76bf9d1e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/novmxsetjmp.h @@ -0,0 +1,132 @@ +/* Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Copied from setjmp/setjmp.h, powerpc/bits/setjmp.h and modified + appropriately to keep backward compatible with setjmp without + AltiVec/VMX support. + + This file is not exported and the interfaces are private to libc. */ + +#ifndef __NOVMX_SETJMP_H +#define __NOVMX_SETJMP_H 1 + +#include <bits/wordsize.h> + +/* The following definitions are needed by ASM implementations of the old + (novmx) __longjmp/__setjmp functions. */ + +# define JB_GPR1 0 /* Also known as the stack pointer */ +# define JB_GPR2 1 +# define JB_LR 2 /* The address we will return to */ +# if __WORDSIZE == 64 +# define JB_GPRS 3 /* GPRs 14 through 31 are saved, 18*2 words total. */ +# define JB_CR 21 /* Condition code registers. */ +# define JB_FPRS 22 /* FPRs 14 through 31 are saved, 18*2 words total. */ +# define JB_SIZE (40 * 8) +# else +# define JB_GPRS 3 /* GPRs 14 through 31 are saved, 18 in total. */ +# define JB_CR 21 /* Condition code registers. */ +# define JB_FPRS 22 /* FPRs 14 through 31 are saved, 18*2 words total. */ +# define JB_SIZE (58 * 4) +# endif + +#ifndef _ASM +/* The following definitions are needed by the novmx* implementations of + setjmp/longjmp/sigsetjmp/etc that wrapper __setjmp/__longjmp. */ + +# if __WORDSIZE == 64 +typedef long int __jmp_buf[40]; +# else +typedef long int __jmp_buf[58]; +# endif + +# include <bits/types/__sigset_t.h> + +/* Calling environment, plus possibly a saved signal mask. */ +typedef struct __novmx__jmp_buf_tag + { + /* NOTE: The machine-dependent definitions of `__sigsetjmp' + assume that a `jmp_buf' begins with a `__jmp_buf' and that + `__mask_was_saved' follows it. Do not move these members + or add others before it. */ + __jmp_buf __jmpbuf; /* Calling environment. */ + int __mask_was_saved; /* Saved the signal mask? */ + __sigset_t __saved_mask; /* Saved signal mask. */ + } __novmx__jmp_buf[1]; + + +/* Store the calling environment in ENV, also saving the signal mask. + Return 0. */ +extern int __novmxsetjmp (__novmx__jmp_buf __env); + +/* Store the calling environment in ENV, also saving the + signal mask if SAVEMASK is nonzero. Return 0. + This is the internal name for `sigsetjmp'. */ +extern int __novmx__sigsetjmp (struct __novmx__jmp_buf_tag __env[1], + int __savemask); + +/* Store the calling environment in ENV, not saving the signal mask. + Return 0. */ +extern int __novmx_setjmp (struct __novmx__jmp_buf_tag __env[1]); + +/* Jump to the environment saved in ENV, making the + `setjmp' call there return VAL, or 1 if VAL is 0. */ +extern void __novmxlongjmp (struct __novmx__jmp_buf_tag __env[1], int __val) + __attribute__ ((__noreturn__)); + +/* Same. Usually `_longjmp' is used with `_setjmp', which does not save + the signal mask. But it is how ENV was saved that determines whether + `longjmp' restores the mask; `_longjmp' is just an alias. */ +extern void __novmx_longjmp (struct __novmx__jmp_buf_tag __env[1], int __val) + __attribute__ ((__noreturn__)); + +/* Use the same type for `jmp_buf' and `sigjmp_buf'. + The `__mask_was_saved' flag determines whether + or not `longjmp' will restore the signal mask. */ +typedef struct __novmx__jmp_buf_tag __novmx__sigjmp_buf[1]; + +/* Jump to the environment saved in ENV, making the + sigsetjmp call there return VAL, or 1 if VAL is 0. + Restore the signal mask if that sigsetjmp call saved it. + This is just an alias `longjmp'. */ +extern void __novmxsiglongjmp (__novmx__sigjmp_buf __env, int __val) + __attribute__ ((__noreturn__)); + +/* Internal machine-dependent function to restore context sans signal mask. */ +extern void __novmx__longjmp (__jmp_buf __env, int __val) + __attribute__ ((__noreturn__)); + +/* Internal function to possibly save the current mask of blocked signals + in ENV, and always set the flag saying whether or not it was saved. + This is used by the machine-dependent definition of `__sigsetjmp'. + Always returns zero, for convenience. */ +extern int __novmx__sigjmp_save (__novmx__jmp_buf __env, int __savemask); + +extern void _longjmp_unwind (__novmx__jmp_buf env, int val); + +extern void __novmx__libc_siglongjmp (__novmx__sigjmp_buf env, int val) + __attribute__ ((noreturn)); + +extern void __novmx__libc_longjmp (__novmx__sigjmp_buf env, int val) + __attribute__ ((noreturn)); + +libc_hidden_proto (__novmx__libc_longjmp) +libc_hidden_proto (__novmx_setjmp) +libc_hidden_proto (__novmx__sigsetjmp) +#endif /* !_ASM */ + +#endif diff --git a/REORG.TODO/sysdeps/powerpc/nptl/Makefile b/REORG.TODO/sysdeps/powerpc/nptl/Makefile new file mode 100644 index 0000000000..13fa630f2f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nptl/Makefile @@ -0,0 +1,20 @@ +# Copyright (C) 2003-2017 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. + +ifeq ($(subdir),csu) +gen-as-const-headers += tcb-offsets.sym +endif diff --git a/REORG.TODO/sysdeps/powerpc/nptl/bits/pthreadtypes-arch.h b/REORG.TODO/sysdeps/powerpc/nptl/bits/pthreadtypes-arch.h new file mode 100644 index 0000000000..f29119b794 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nptl/bits/pthreadtypes-arch.h @@ -0,0 +1,79 @@ +/* Machine-specific pthread type layouts. PowerPC version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _BITS_PTHREADTYPES_ARCH_H +#define _BITS_PTHREADTYPES_ARCH_H 1 + +#include <bits/wordsize.h> + +#if __WORDSIZE == 64 +# define __SIZEOF_PTHREAD_MUTEX_T 40 +# define __SIZEOF_PTHREAD_ATTR_T 56 +# define __SIZEOF_PTHREAD_RWLOCK_T 56 +# define __SIZEOF_PTHREAD_BARRIER_T 32 +#else +# define __SIZEOF_PTHREAD_MUTEX_T 24 +# define __SIZEOF_PTHREAD_ATTR_T 36 +# define __SIZEOF_PTHREAD_RWLOCK_T 32 +# define __SIZEOF_PTHREAD_BARRIER_T 20 +#endif +#define __SIZEOF_PTHREAD_MUTEXATTR_T 4 +#define __SIZEOF_PTHREAD_COND_T 48 +#define __SIZEOF_PTHREAD_CONDATTR_T 4 +#define __SIZEOF_PTHREAD_RWLOCKATTR_T 8 +#define __SIZEOF_PTHREAD_BARRIERATTR_T 4 + +/* Definitions for internal mutex struct. */ +#define __PTHREAD_COMPAT_PADDING_MID +#define __PTHREAD_COMPAT_PADDING_END +#define __PTHREAD_MUTEX_LOCK_ELISION 1 + +#define __LOCK_ALIGNMENT +#define __ONCE_ALIGNMENT + +struct __pthread_rwlock_arch_t +{ + unsigned int __readers; + unsigned int __writers; + unsigned int __wrphase_futex; + unsigned int __writers_futex; + unsigned int __pad3; + unsigned int __pad4; +#if __WORDSIZE == 64 + int __cur_writer; + int __shared; + unsigned char __rwelision; + unsigned char __pad1[7]; + unsigned long int __pad2; + /* FLAGS must stay at this position in the structure to maintain + binary compatibility. */ + unsigned int __flags; +# define __PTHREAD_RWLOCK_ELISION_EXTRA 0, {0, 0, 0, 0, 0, 0, 0 } +#else + unsigned char __rwelision; + unsigned char __pad2; + unsigned char __shared; + /* FLAGS must stay at this position in the structure to maintain + binary compatibility. */ + unsigned char __flags; + int __cur_writer; +# define __PTHREAD_RWLOCK_ELISION_EXTRA 0 +#endif +}; + +#endif /* bits/pthreadtypes.h */ diff --git a/REORG.TODO/sysdeps/powerpc/nptl/elide.h b/REORG.TODO/sysdeps/powerpc/nptl/elide.h new file mode 100644 index 0000000000..1c42814b71 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nptl/elide.h @@ -0,0 +1,125 @@ +/* elide.h: Generic lock elision support for powerpc. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef ELIDE_PPC_H +# define ELIDE_PPC_H + +#ifdef ENABLE_LOCK_ELISION +# include <htm.h> +# include <elision-conf.h> + +/* Get the new value of adapt_count according to the elision + configurations. Returns true if the system should retry again or false + otherwise. */ +static inline bool +__get_new_count (uint8_t *adapt_count, int attempt) +{ + /* A persistent failure indicates that a retry will probably + result in another failure. Use normal locking now and + for the next couple of calls. */ + if (_TEXASRU_FAILURE_PERSISTENT (__builtin_get_texasru ())) + { + if (__elision_aconf.skip_lock_internal_abort > 0) + *adapt_count = __elision_aconf.skip_lock_internal_abort; + return false; + } + /* Same logic as above, but for a number of temporary failures in a + a row. */ + else if (attempt <= 1 && __elision_aconf.skip_lock_out_of_tbegin_retries > 0 + && __elision_aconf.try_tbegin > 0) + *adapt_count = __elision_aconf.skip_lock_out_of_tbegin_retries; + return true; +} + +/* CONCURRENCY NOTES: + + The evaluation of the macro expression is_lock_free encompasses one or + more loads from memory locations that are concurrently modified by other + threads. For lock elision to work, this evaluation and the rest of the + critical section protected by the lock must be atomic because an + execution with lock elision must be equivalent to an execution in which + the lock would have been actually acquired and released. Therefore, we + evaluate is_lock_free inside of the transaction that represents the + critical section for which we want to use lock elision, which ensures + the atomicity that we require. */ + +/* Returns 0 if the lock defined by is_lock_free was elided. + ADAPT_COUNT is a per-lock state variable. */ +# define ELIDE_LOCK(adapt_count, is_lock_free) \ + ({ \ + int ret = 0; \ + if (adapt_count > 0) \ + (adapt_count)--; \ + else \ + for (int i = __elision_aconf.try_tbegin; i > 0; i--) \ + { \ + if (__libc_tbegin (0)) \ + { \ + if (is_lock_free) \ + { \ + ret = 1; \ + break; \ + } \ + __libc_tabort (_ABORT_LOCK_BUSY); \ + } \ + else \ + if (!__get_new_count (&adapt_count,i)) \ + break; \ + } \ + ret; \ + }) + +# define ELIDE_TRYLOCK(adapt_count, is_lock_free, write) \ + ({ \ + int ret = 0; \ + if (__elision_aconf.try_tbegin > 0) \ + { \ + if (write) \ + __libc_tabort (_ABORT_NESTED_TRYLOCK); \ + ret = ELIDE_LOCK (adapt_count, is_lock_free); \ + } \ + ret; \ + }) + + +static inline bool +__elide_unlock (int is_lock_free) +{ + if (is_lock_free) + { + /* This code is expected to crash when trying to unlock a lock not + held by this thread. More information is available in the + __pthread_rwlock_unlock() implementation. */ + __libc_tend (0); + return true; + } + return false; +} + +# define ELIDE_UNLOCK(is_lock_free) \ + __elide_unlock (is_lock_free) + +# else + +# define ELIDE_LOCK(adapt_count, is_lock_free) 0 +# define ELIDE_TRYLOCK(adapt_count, is_lock_free, write) 0 +# define ELIDE_UNLOCK(is_lock_free) 0 + +#endif /* ENABLE_LOCK_ELISION */ + +#endif diff --git a/REORG.TODO/sysdeps/powerpc/nptl/pthread_spin_lock.c b/REORG.TODO/sysdeps/powerpc/nptl/pthread_spin_lock.c new file mode 100644 index 0000000000..e377feb1cf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nptl/pthread_spin_lock.c @@ -0,0 +1,43 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Paul Mackerras <paulus@au.ibm.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "pthreadP.h" + +int +pthread_spin_lock (pthread_spinlock_t *lock) +{ + unsigned int __tmp; + + asm volatile ( + "1: lwarx %0,0,%1" MUTEX_HINT_ACQ "\n" + " cmpwi 0,%0,0\n" + " bne- 2f\n" + " stwcx. %2,0,%1\n" + " bne- 2f\n" + __ARCH_ACQ_INSTR "\n" + " .subsection 1\n" + "2: lwzx %0,0,%1\n" + " cmpwi 0,%0,0\n" + " bne 2b\n" + " b 1b\n" + " .previous" + : "=&r" (__tmp) + : "r" (lock), "r" (1) + : "cr0", "memory"); + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/nptl/pthread_spin_trylock.c b/REORG.TODO/sysdeps/powerpc/nptl/pthread_spin_trylock.c new file mode 100644 index 0000000000..d81d984237 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nptl/pthread_spin_trylock.c @@ -0,0 +1,41 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Paul Mackerras <paulus@au.ibm.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include "pthreadP.h" + +int +pthread_spin_trylock (pthread_spinlock_t *lock) +{ + unsigned int old; + int err = EBUSY; + + asm ("1: lwarx %0,0,%2" MUTEX_HINT_ACQ "\n" + " cmpwi 0,%0,0\n" + " bne 2f\n" + " stwcx. %3,0,%2\n" + " bne- 1b\n" + " li %1,0\n" + __ARCH_ACQ_INSTR "\n" + "2: " + : "=&r" (old), "=&r" (err) + : "r" (lock), "r" (1), "1" (err) + : "cr0", "memory"); + + return err; +} diff --git a/REORG.TODO/sysdeps/powerpc/nptl/pthread_spin_unlock.c b/REORG.TODO/sysdeps/powerpc/nptl/pthread_spin_unlock.c new file mode 100644 index 0000000000..fa30a82491 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nptl/pthread_spin_unlock.c @@ -0,0 +1,27 @@ +/* pthread_spin_unlock -- unlock a spin lock. PowerPC version. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "pthreadP.h" +#include <lowlevellock.h> + +int +pthread_spin_unlock (pthread_spinlock_t *lock) +{ + atomic_store_release (lock, 0); + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/nptl/pthreaddef.h b/REORG.TODO/sysdeps/powerpc/nptl/pthreaddef.h new file mode 100644 index 0000000000..e3e407a4c4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nptl/pthreaddef.h @@ -0,0 +1,33 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Default stack size. */ +#define ARCH_STACK_DEFAULT_SIZE (4 * 1024 * 1024) + +/* Required stack pointer alignment at beginning. The ABI requires 16 + bytes (for both 32-bit and 64-bit PowerPC). */ +#define STACK_ALIGN 16 + +/* Minimal stack size after allocating thread descriptor and guard size. */ +#define MINIMAL_REST_STACK 4096 + +/* Alignment requirement for TCB. */ +#define TCB_ALIGNMENT 16 + + +/* Location of current stack frame. */ +#define CURRENT_STACK_FRAME __builtin_frame_address (0) diff --git a/REORG.TODO/sysdeps/powerpc/nptl/tcb-offsets.sym b/REORG.TODO/sysdeps/powerpc/nptl/tcb-offsets.sym new file mode 100644 index 0000000000..7c9fd33562 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nptl/tcb-offsets.sym @@ -0,0 +1,32 @@ +#include <sysdep.h> +#include <tls.h> +#include <kernel-features.h> + +-- + +-- Abuse tls.h macros to derive offsets relative to the thread register. +# undef __thread_register +# define __thread_register ((void *) 0) +# define thread_offsetof(mem) ((ptrdiff_t) THREAD_SELF + offsetof (struct pthread, mem)) + + +#if TLS_MULTIPLE_THREADS_IN_TCB +MULTIPLE_THREADS_OFFSET thread_offsetof (header.multiple_threads) +#endif +TID thread_offsetof (tid) +POINTER_GUARD (offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) +TAR_SAVE (offsetof (tcbhead_t, tar_save) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) +DSO_SLOT1 (offsetof (tcbhead_t, dso_slot1) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) +DSO_SLOT2 (offsetof (tcbhead_t, dso_slot2) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) +#ifdef __powerpc64__ +TCB_AT_PLATFORM (offsetof (tcbhead_t, at_platform) - TLS_TCB_OFFSET - sizeof(tcbhead_t)) +#endif +TM_CAPABLE (offsetof (tcbhead_t, tm_capable) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) +#ifndef __powerpc64__ +TCB_AT_PLATFORM (offsetof (tcbhead_t, at_platform) - TLS_TCB_OFFSET - sizeof(tcbhead_t)) +PADDING (offsetof (tcbhead_t, padding) - TLS_TCB_OFFSET - sizeof(tcbhead_t)) +#endif +TCB_HWCAP (offsetof (tcbhead_t, hwcap) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) +#ifndef __ASSUME_PRIVATE_FUTEX +PRIVATE_FUTEX_OFFSET thread_offsetof (header.private_futex) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/nptl/tls.h b/REORG.TODO/sysdeps/powerpc/nptl/tls.h new file mode 100644 index 0000000000..7556e7c8b8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/nptl/tls.h @@ -0,0 +1,263 @@ +/* Definition for thread-local data handling. NPTL/PowerPC version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _TLS_H +#define _TLS_H 1 + +# include <dl-sysdep.h> + +#ifndef __ASSEMBLER__ +# include <stdbool.h> +# include <stddef.h> +# include <stdint.h> +# include <dl-dtv.h> + +#else /* __ASSEMBLER__ */ +# include <tcb-offsets.h> +#endif /* __ASSEMBLER__ */ + + +#ifndef __ASSEMBLER__ + +# include <hwcapinfo.h> + +/* Get system call information. */ +# include <sysdep.h> + +/* The TP points to the start of the thread blocks. */ +# define TLS_DTV_AT_TP 1 +# define TLS_TCB_AT_TP 0 + +/* We use the multiple_threads field in the pthread struct */ +#define TLS_MULTIPLE_THREADS_IN_TCB 1 + +/* Get the thread descriptor definition. */ +# include <nptl/descr.h> + + +/* The stack_guard is accessed directly by GCC -fstack-protector code, + so it is a part of public ABI. The dtv and pointer_guard fields + are private. */ +typedef struct +{ + /* Reservation for HWCAP data. To be accessed by GCC in + __builtin_cpu_supports(), so it is a part of public ABI. */ + uint64_t hwcap; + /* Reservation for AT_PLATFORM data. To be accessed by GCC in + __builtin_cpu_is(), so it is a part of public ABI. Since there + are different ABIs for 32 and 64 bit, we put this field in a + previously empty padding space for powerpc64. */ +#ifndef __powerpc64__ + /* Padding to maintain alignment. */ + uint32_t padding; + uint32_t at_platform; +#endif + /* Indicate if HTM capable (ISA 2.07). */ + uint32_t tm_capable; + /* Reservation for AT_PLATFORM data - powerpc64. */ +#ifdef __powerpc64__ + uint32_t at_platform; +#endif + /* Reservation for Dynamic System Optimizer ABI. */ + uintptr_t dso_slot2; + uintptr_t dso_slot1; + /* Reservation for tar register (ISA 2.07). */ + uintptr_t tar_save; + /* GCC split stack support. */ + void *__private_ss; + /* Reservation for the Event-Based Branching ABI. */ + uintptr_t ebb_handler; + uintptr_t ebb_ctx_pointer; + uintptr_t ebb_reserved1; + uintptr_t ebb_reserved2; + uintptr_t pointer_guard; + uintptr_t stack_guard; + dtv_t *dtv; +} tcbhead_t; + +/* This is the size of the initial TCB. */ +# define TLS_INIT_TCB_SIZE 0 + +/* Alignment requirements for the initial TCB. */ +# define TLS_INIT_TCB_ALIGN __alignof__ (struct pthread) + +/* This is the size of the TCB. */ +# define TLS_TCB_SIZE 0 + +/* Alignment requirements for the TCB. */ +# define TLS_TCB_ALIGN __alignof__ (struct pthread) + +/* This is the size we need before TCB. */ +# define TLS_PRE_TCB_SIZE \ + (sizeof (struct pthread) \ + + ((sizeof (tcbhead_t) + TLS_TCB_ALIGN - 1) & ~(TLS_TCB_ALIGN - 1))) + +# ifndef __powerpc64__ +/* Register r2 (tp) is reserved by the ABI as "thread pointer". */ +register void *__thread_register __asm__ ("r2"); +# define PT_THREAD_POINTER PT_R2 +# else +/* Register r13 (tp) is reserved by the ABI as "thread pointer". */ +register void *__thread_register __asm__ ("r13"); +# define PT_THREAD_POINTER PT_R13 +# endif + +/* The following assumes that TP (R2 or R13) points to the end of the + TCB + 0x7000 (per the ABI). This implies that TCB address is + TP - 0x7000. As we define TLS_DTV_AT_TP we can + assume that the pthread struct is allocated immediately ahead of the + TCB. This implies that the pthread_descr address is + TP - (TLS_PRE_TCB_SIZE + 0x7000). */ +# define TLS_TCB_OFFSET 0x7000 + +/* Install the dtv pointer. The pointer passed is to the element with + index -1 which contain the length. */ +# define INSTALL_DTV(tcbp, dtvp) \ + ((tcbhead_t *) (tcbp))[-1].dtv = dtvp + 1 + +/* Install new dtv for current thread. */ +# define INSTALL_NEW_DTV(dtv) (THREAD_DTV() = (dtv)) + +/* Return dtv of given thread descriptor. */ +# define GET_DTV(tcbp) (((tcbhead_t *) (tcbp))[-1].dtv) + +/* Code to initially initialize the thread pointer. This might need + special attention since 'errno' is not yet available and if the + operation can cause a failure 'errno' must not be touched. */ +# define TLS_INIT_TP(tcbp) \ + ({ \ + __thread_register = (void *) (tcbp) + TLS_TCB_OFFSET; \ + THREAD_SET_TM_CAPABLE (__tcb_hwcap & PPC_FEATURE2_HAS_HTM ? 1 : 0); \ + THREAD_SET_HWCAP (__tcb_hwcap); \ + THREAD_SET_AT_PLATFORM (__tcb_platform); \ + NULL; \ + }) + +/* Value passed to 'clone' for initialization of the thread register. */ +# define TLS_DEFINE_INIT_TP(tp, pd) \ + void *tp = (void *) (pd) + TLS_TCB_OFFSET + TLS_PRE_TCB_SIZE; \ + (((tcbhead_t *) ((char *) tp - TLS_TCB_OFFSET))[-1].tm_capable) = \ + THREAD_GET_TM_CAPABLE (); \ + (((tcbhead_t *) ((char *) tp - TLS_TCB_OFFSET))[-1].hwcap) = \ + THREAD_GET_HWCAP (); \ + (((tcbhead_t *) ((char *) tp - TLS_TCB_OFFSET))[-1].at_platform) = \ + THREAD_GET_AT_PLATFORM (); + +/* Return the address of the dtv for the current thread. */ +# define THREAD_DTV() \ + (((tcbhead_t *) (__thread_register - TLS_TCB_OFFSET))[-1].dtv) + +/* Return the thread descriptor for the current thread. */ +# define THREAD_SELF \ + ((struct pthread *) (__thread_register \ + - TLS_TCB_OFFSET - TLS_PRE_TCB_SIZE)) + +/* Magic for libthread_db to know how to do THREAD_SELF. */ +# define DB_THREAD_SELF \ + REGISTER (32, 32, PT_THREAD_POINTER * 4, \ + - TLS_TCB_OFFSET - TLS_PRE_TCB_SIZE) \ + REGISTER (64, 64, PT_THREAD_POINTER * 8, \ + - TLS_TCB_OFFSET - TLS_PRE_TCB_SIZE) + +/* Read member of the thread descriptor directly. */ +# define THREAD_GETMEM(descr, member) ((void)(descr), (THREAD_SELF)->member) + +/* Same as THREAD_GETMEM, but the member offset can be non-constant. */ +# define THREAD_GETMEM_NC(descr, member, idx) \ + ((void)(descr), (THREAD_SELF)->member[idx]) + +/* Set member of the thread descriptor directly. */ +# define THREAD_SETMEM(descr, member, value) \ + ((void)(descr), (THREAD_SELF)->member = (value)) + +/* Same as THREAD_SETMEM, but the member offset can be non-constant. */ +# define THREAD_SETMEM_NC(descr, member, idx, value) \ + ((void)(descr), (THREAD_SELF)->member[idx] = (value)) + +/* Set the stack guard field in TCB head. */ +# define THREAD_SET_STACK_GUARD(value) \ + (((tcbhead_t *) ((char *) __thread_register \ + - TLS_TCB_OFFSET))[-1].stack_guard = (value)) +# define THREAD_COPY_STACK_GUARD(descr) \ + (((tcbhead_t *) ((char *) (descr) \ + + TLS_PRE_TCB_SIZE))[-1].stack_guard \ + = ((tcbhead_t *) ((char *) __thread_register \ + - TLS_TCB_OFFSET))[-1].stack_guard) + +/* Set the stack guard field in TCB head. */ +# define THREAD_GET_POINTER_GUARD() \ + (((tcbhead_t *) ((char *) __thread_register \ + - TLS_TCB_OFFSET))[-1].pointer_guard) +# define THREAD_SET_POINTER_GUARD(value) \ + (THREAD_GET_POINTER_GUARD () = (value)) +# define THREAD_COPY_POINTER_GUARD(descr) \ + (((tcbhead_t *) ((char *) (descr) \ + + TLS_PRE_TCB_SIZE))[-1].pointer_guard \ + = THREAD_GET_POINTER_GUARD()) + +/* tm_capable field in TCB head. */ +# define THREAD_GET_TM_CAPABLE() \ + (((tcbhead_t *) ((char *) __thread_register \ + - TLS_TCB_OFFSET))[-1].tm_capable) +# define THREAD_SET_TM_CAPABLE(value) \ + (THREAD_GET_TM_CAPABLE () = (value)) + +/* hwcap field in TCB head. */ +# define THREAD_GET_HWCAP() \ + (((tcbhead_t *) ((char *) __thread_register \ + - TLS_TCB_OFFSET))[-1].hwcap) +# define THREAD_SET_HWCAP(value) \ + (THREAD_GET_HWCAP () = (value)) + +/* at_platform field in TCB head. */ +# define THREAD_GET_AT_PLATFORM() \ + (((tcbhead_t *) ((char *) __thread_register \ + - TLS_TCB_OFFSET))[-1].at_platform) +# define THREAD_SET_AT_PLATFORM(value) \ + (THREAD_GET_AT_PLATFORM () = (value)) + +/* l_tls_offset == 0 is perfectly valid on PPC, so we have to use some + different value to mean unset l_tls_offset. */ +# define NO_TLS_OFFSET -1 + +/* Get and set the global scope generation counter in struct pthread. */ +#define THREAD_GSCOPE_FLAG_UNUSED 0 +#define THREAD_GSCOPE_FLAG_USED 1 +#define THREAD_GSCOPE_FLAG_WAIT 2 +#define THREAD_GSCOPE_RESET_FLAG() \ + do \ + { int __res \ + = atomic_exchange_rel (&THREAD_SELF->header.gscope_flag, \ + THREAD_GSCOPE_FLAG_UNUSED); \ + if (__res == THREAD_GSCOPE_FLAG_WAIT) \ + lll_futex_wake (&THREAD_SELF->header.gscope_flag, 1, LLL_PRIVATE); \ + } \ + while (0) +#define THREAD_GSCOPE_SET_FLAG() \ + do \ + { \ + THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED; \ + atomic_write_barrier (); \ + } \ + while (0) +#define THREAD_GSCOPE_WAIT() \ + GL(dl_wait_lookup_done) () + +#endif /* __ASSEMBLER__ */ + +#endif /* tls.h */ diff --git a/REORG.TODO/sysdeps/powerpc/power4/fpu/Makefile b/REORG.TODO/sysdeps/powerpc/power4/fpu/Makefile new file mode 100644 index 0000000000..e17d32f30e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power4/fpu/Makefile @@ -0,0 +1,7 @@ +# Makefile fragment for POWER4/5/5+ with FPU. + +ifeq ($(subdir),math) +CFLAGS-mpa.c += --param max-unroll-times=4 -funroll-loops -fpeel-loops +CPPFLAGS-slowpow.c += -DUSE_LONG_DOUBLE_FOR_MP=1 +CPPFLAGS-slowexp.c += -DUSE_LONG_DOUBLE_FOR_MP=1 +endif diff --git a/REORG.TODO/sysdeps/powerpc/power4/fpu/mpa-arch.h b/REORG.TODO/sysdeps/powerpc/power4/fpu/mpa-arch.h new file mode 100644 index 0000000000..4754c1b0f9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power4/fpu/mpa-arch.h @@ -0,0 +1,56 @@ +/* Overridable constants and operations. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +typedef double mantissa_t; +typedef double mantissa_store_t; + +#define TWOPOW(i) (0x1.0p##i) + +#define RADIX TWOPOW (24) /* 2^24 */ +#define CUTTER TWOPOW (76) /* 2^76 */ +#define RADIXI 0x1.0p-24 /* 2^-24 */ +#define TWO52 TWOPOW (52) /* 2^52 */ + +/* Divide D by RADIX and put the remainder in R. */ +#define DIV_RADIX(d,r) \ + ({ \ + double u = ((d) + CUTTER) - CUTTER; \ + if (u > (d)) \ + u -= RADIX; \ + r = (d) - u; \ + (d) = u * RADIXI; \ + }) + +/* Put the integer component of a double X in R and retain the fraction in + X. */ +#define INTEGER_OF(x, r) \ + ({ \ + double u = ((x) + TWO52) - TWO52; \ + if (u > (x)) \ + u -= 1; \ + (r) = u; \ + (x) -= u; \ + }) + +/* Align IN down to a multiple of F, where F is a power of two. */ +#define ALIGN_DOWN_TO(in, f) \ + ({ \ + double factor = f * TWO52; \ + double u = (in + factor) - factor; \ + if (u > in) \ + u -= f; \ + u; \ + }) diff --git a/REORG.TODO/sysdeps/powerpc/power4/fpu/mpa.c b/REORG.TODO/sysdeps/powerpc/power4/fpu/mpa.c new file mode 100644 index 0000000000..0a0f7175b4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power4/fpu/mpa.c @@ -0,0 +1,214 @@ + +/* + * IBM Accurate Mathematical Library + * written by International Business Machines Corp. + * Copyright (C) 2001-2017 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* Define __mul and __sqr and use the rest from generic code. */ +#define NO__MUL +#define NO__SQR + +#include <sysdeps/ieee754/dbl-64/mpa.c> + +/* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X + and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P + digits. In case P > 3 the error is bounded by 1.001 ULP. */ +void +__mul (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ + long i, i1, i2, j, k, k2; + long p2 = p; + double u, zk, zk2; + + /* Is z=0? */ + if (__glibc_unlikely (X[0] * Y[0] == 0)) + { + Z[0] = 0; + return; + } + + /* Multiply, add and carry */ + k2 = (p2 < 3) ? p2 + p2 : p2 + 3; + zk = Z[k2] = 0; + for (k = k2; k > 1;) + { + if (k > p2) + { + i1 = k - p2; + i2 = p2 + 1; + } + else + { + i1 = 1; + i2 = k; + } +#if 1 + /* Rearrange this inner loop to allow the fmadd instructions to be + independent and execute in parallel on processors that have + dual symmetrical FP pipelines. */ + if (i1 < (i2 - 1)) + { + /* Make sure we have at least 2 iterations. */ + if (((i2 - i1) & 1L) == 1L) + { + /* Handle the odd iterations case. */ + zk2 = x->d[i2 - 1] * y->d[i1]; + } + else + zk2 = 0.0; + /* Do two multiply/adds per loop iteration, using independent + accumulators; zk and zk2. */ + for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2) + { + zk += x->d[i] * y->d[j]; + zk2 += x->d[i + 1] * y->d[j - 1]; + } + zk += zk2; /* Final sum. */ + } + else + { + /* Special case when iterations is 1. */ + zk += x->d[i1] * y->d[i1]; + } +#else + /* The original code. */ + for (i = i1, j = i2 - 1; i < i2; i++, j--) + zk += X[i] * Y[j]; +#endif + + u = (zk + CUTTER) - CUTTER; + if (u > zk) + u -= RADIX; + Z[k] = zk - u; + zk = u * RADIXI; + --k; + } + Z[k] = zk; + + int e = EX + EY; + /* Is there a carry beyond the most significant digit? */ + if (Z[1] == 0) + { + for (i = 1; i <= p2; i++) + Z[i] = Z[i + 1]; + e--; + } + + EZ = e; + Z[0] = X[0] * Y[0]; +} + +/* Square *X and store result in *Y. X and Y may not overlap. For P in + [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the + error is bounded by 1.001 ULP. This is a faster special case of + multiplication. */ +void +__sqr (const mp_no *x, mp_no *y, int p) +{ + long i, j, k, ip; + double u, yk; + + /* Is z=0? */ + if (__glibc_unlikely (X[0] == 0)) + { + Y[0] = 0; + return; + } + + /* We need not iterate through all X's since it's pointless to + multiply zeroes. */ + for (ip = p; ip > 0; ip--) + if (X[ip] != 0) + break; + + k = (__glibc_unlikely (p < 3)) ? p + p : p + 3; + + while (k > 2 * ip + 1) + Y[k--] = 0; + + yk = 0; + + while (k > p) + { + double yk2 = 0.0; + long lim = k / 2; + + if (k % 2 == 0) + { + yk += X[lim] * X[lim]; + lim--; + } + + /* In __mul, this loop (and the one within the next while loop) run + between a range to calculate the mantissa as follows: + + Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1] + + X[n] * Y[k] + + For X == Y, we can get away with summing halfway and doubling the + result. For cases where the range size is even, the mid-point needs + to be added separately (above). */ + for (i = k - p, j = p; i <= lim; i++, j--) + yk2 += X[i] * X[j]; + + yk += 2.0 * yk2; + + u = (yk + CUTTER) - CUTTER; + if (u > yk) + u -= RADIX; + Y[k--] = yk - u; + yk = u * RADIXI; + } + + while (k > 1) + { + double yk2 = 0.0; + long lim = k / 2; + + if (k % 2 == 0) + { + yk += X[lim] * X[lim]; + lim--; + } + + /* Likewise for this loop. */ + for (i = 1, j = k - 1; i <= lim; i++, j--) + yk2 += X[i] * X[j]; + + yk += 2.0 * yk2; + + u = (yk + CUTTER) - CUTTER; + if (u > yk) + u -= RADIX; + Y[k--] = yk - u; + yk = u * RADIXI; + } + Y[k] = yk; + + /* Squares are always positive. */ + Y[0] = 1.0; + + int e = EX * 2; + /* Is there a carry beyond the most significant digit? */ + if (__glibc_unlikely (Y[1] == 0)) + { + for (i = 1; i <= p; i++) + Y[i] = Y[i + 1]; + e--; + } + EY = e; +} diff --git a/REORG.TODO/sysdeps/powerpc/power4/wordcopy.c b/REORG.TODO/sysdeps/powerpc/power4/wordcopy.c new file mode 100644 index 0000000000..2648e25c58 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power4/wordcopy.c @@ -0,0 +1,212 @@ +/* _memcopy.c -- subroutines for memory copy functions. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Torbjorn Granlund (tege@sics.se). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */ + +#include <stddef.h> +#include <memcopy.h> + +/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to + block beginning at DSTP with LEN `op_t' words (not LEN bytes!). + Both SRCP and DSTP should be aligned for memory operations on `op_t's. */ + +#ifndef WORDCOPY_FWD_ALIGNED +# define WORDCOPY_FWD_ALIGNED _wordcopy_fwd_aligned +#endif + +void +WORDCOPY_FWD_ALIGNED (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1; + + if (len & 1) + { + ((op_t *) dstp)[0] = ((op_t *) srcp)[0]; + + if (len == 1) + return; + srcp += OPSIZ; + dstp += OPSIZ; + len -= 1; + } + + do + { + a0 = ((op_t *) srcp)[0]; + a1 = ((op_t *) srcp)[1]; + ((op_t *) dstp)[0] = a0; + ((op_t *) dstp)[1] = a1; + + srcp += 2 * OPSIZ; + dstp += 2 * OPSIZ; + len -= 2; + } + while (len != 0); +} + +/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to + block beginning at DSTP with LEN `op_t' words (not LEN bytes!). + DSTP should be aligned for memory operations on `op_t's, but SRCP must + *not* be aligned. */ + +#ifndef WORDCOPY_FWD_DEST_ALIGNED +# define WORDCOPY_FWD_DEST_ALIGNED _wordcopy_fwd_dest_aligned +#endif + +void +WORDCOPY_FWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1, a2; + int sh_1, sh_2; + + /* Calculate how to shift a word read at the memory operation + aligned srcp to make it aligned for copy. */ + + sh_1 = 8 * (srcp % OPSIZ); + sh_2 = 8 * OPSIZ - sh_1; + + /* Make SRCP aligned by rounding it down to the beginning of the `op_t' + it points in the middle of. */ + srcp &= -OPSIZ; + a0 = ((op_t *) srcp)[0]; + + if (len & 1) + { + a1 = ((op_t *) srcp)[1]; + ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2); + + if (len == 1) + return; + + a0 = a1; + srcp += OPSIZ; + dstp += OPSIZ; + len -= 1; + } + + do + { + a1 = ((op_t *) srcp)[1]; + a2 = ((op_t *) srcp)[2]; + ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2); + ((op_t *) dstp)[1] = MERGE (a1, sh_1, a2, sh_2); + a0 = a2; + + srcp += 2 * OPSIZ; + dstp += 2 * OPSIZ; + len -= 2; + } + while (len != 0); +} + +/* _wordcopy_bwd_aligned -- Copy block finishing right before + SRCP to block finishing right before DSTP with LEN `op_t' words + (not LEN bytes!). Both SRCP and DSTP should be aligned for memory + operations on `op_t's. */ + +#ifndef WORDCOPY_BWD_ALIGNED +# define WORDCOPY_BWD_ALIGNED _wordcopy_bwd_aligned +#endif + +void +WORDCOPY_BWD_ALIGNED (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1; + + if (len & 1) + { + srcp -= OPSIZ; + dstp -= OPSIZ; + ((op_t *) dstp)[0] = ((op_t *) srcp)[0]; + + if (len == 1) + return; + len -= 1; + } + + do + { + srcp -= 2 * OPSIZ; + dstp -= 2 * OPSIZ; + + a1 = ((op_t *) srcp)[1]; + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[1] = a1; + ((op_t *) dstp)[0] = a0; + + len -= 2; + } + while (len != 0); +} + +/* _wordcopy_bwd_dest_aligned -- Copy block finishing right + before SRCP to block finishing right before DSTP with LEN `op_t' + words (not LEN bytes!). DSTP should be aligned for memory + operations on `op_t', but SRCP must *not* be aligned. */ + +#ifndef WORDCOPY_BWD_DEST_ALIGNED +# define WORDCOPY_BWD_DEST_ALIGNED _wordcopy_bwd_dest_aligned +#endif + +void +WORDCOPY_BWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1, a2; + int sh_1, sh_2; + + /* Calculate how to shift a word read at the memory operation + aligned srcp to make it aligned for copy. */ + + sh_1 = 8 * (srcp % OPSIZ); + sh_2 = 8 * OPSIZ - sh_1; + + /* Make srcp aligned by rounding it down to the beginning of the op_t + it points in the middle of. */ + srcp &= -OPSIZ; + a2 = ((op_t *) srcp)[0]; + + if (len & 1) + { + srcp -= OPSIZ; + dstp -= OPSIZ; + a1 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2); + + if (len == 1) + return; + + a2 = a1; + len -= 1; + } + + do + { + srcp -= 2 * OPSIZ; + dstp -= 2 * OPSIZ; + + a1 = ((op_t *) srcp)[1]; + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[1] = MERGE (a1, sh_1, a2, sh_2); + ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2); + a2 = a0; + + len -= 2; + } + while (len != 0); +} diff --git a/REORG.TODO/sysdeps/powerpc/power5+/fpu/s_modf.c b/REORG.TODO/sysdeps/powerpc/power5+/fpu/s_modf.c new file mode 100644 index 0000000000..ee0c62874b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power5+/fpu/s_modf.c @@ -0,0 +1,58 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <math_ldbl_opt.h> + +double +__modf (double x, double *iptr) +{ + if (__builtin_isinf (x)) + { + *iptr = x; + return __copysign (0.0, x); + } + else if (__builtin_isnan (x)) + { + *iptr = NAN; + return NAN; + } + + if (x >= 0.0) + { + *iptr = __floor (x); + return __copysign (x - *iptr, x); + } + else + { + *iptr = __ceil (x); + return __copysign (x - *iptr, x); + } +} +weak_alias (__modf, modf) +#ifdef NO_LONG_DOUBLE +strong_alias (__modf, __modfl) +weak_alias (__modf, modfl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __modf, modfl, GLIBC_2_0); +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __modf, modfl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/power5+/fpu/s_modff.c b/REORG.TODO/sysdeps/powerpc/power5+/fpu/s_modff.c new file mode 100644 index 0000000000..35bed46fa7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power5+/fpu/s_modff.c @@ -0,0 +1,46 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> + +float +__modff (float x, float *iptr) +{ + if (__builtin_isinff (x)) + { + *iptr = x; + return __copysignf (0.0, x); + } + else if (__builtin_isnanf (x)) + { + *iptr = NAN; + return NAN; + } + + if (x >= 0.0) + { + *iptr = __floorf (x); + return __copysignf (x - *iptr, x); + } + else + { + *iptr = __ceilf (x); + return __copysignf (x - *iptr, x); + } +} +weak_alias (__modff, modff) diff --git a/REORG.TODO/sysdeps/powerpc/power6/wcschr.c b/REORG.TODO/sysdeps/powerpc/power6/wcschr.c new file mode 100644 index 0000000000..a416f31f7c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power6/wcschr.c @@ -0,0 +1,96 @@ +/* wcschr.c - Wide Character Search for POWER6+. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <wchar.h> + +#ifndef WCSCHR +# define WCSCHR __wcschr +# define DEFAULT_WCSCHR +#endif + +/* Find the first occurrence of WC in WCS. */ +wchar_t * +WCSCHR (const wchar_t *wcs, const wchar_t wc) +{ + const wchar_t *wcs2 = wcs + 1; + + if (*wcs == wc) + return (wchar_t *) wcs; + if (*wcs == L'\0') + return NULL; + + do + { + wcs += 2; + + if (*wcs2 == wc) + return (wchar_t *) wcs2; + if (*wcs2 == L'\0') + return NULL; + wcs2 += 2; + + if (*wcs == wc) + return (wchar_t *) wcs; + if (*wcs == L'\0') + return NULL; + wcs += 2; + + if (*wcs2 == wc) + return (wchar_t *) wcs2; + if (*wcs2 == L'\0') + return NULL; + wcs2 += 2; + + if (*wcs == wc) + return (wchar_t *) wcs; + if (*wcs == L'\0') + return NULL; + wcs += 2; + + if (*wcs2 == wc) + return (wchar_t *) wcs2; + if (*wcs2 == L'\0') + return NULL; + wcs2 += 2; + + if (*wcs == wc) + return (wchar_t *) wcs; + if (*wcs == L'\0') + return NULL; + wcs += 2; + + if (*wcs2 == wc) + return (wchar_t *) wcs2; + if (*wcs2 == L'\0') + return NULL; + wcs2 += 2; + + if (*wcs == wc) + return (wchar_t *) wcs; + } + while (*wcs != L'\0'); + + return NULL; +} +#ifdef DEFAULT_WCSCHR +libc_hidden_def (__wcschr) +weak_alias (__wcschr, wcschr) +libc_hidden_weak (wcschr) +#else +libc_hidden_def (wcschr) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/power6/wcscpy.c b/REORG.TODO/sysdeps/powerpc/power6/wcscpy.c new file mode 100644 index 0000000000..e6de240746 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power6/wcscpy.c @@ -0,0 +1,105 @@ +/* wcscpy.c - Wide Character Copy for POWER6+. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <stddef.h> +#include <wchar.h> + +#ifndef WCSCPY +# define WCSCPY wcscpy +#endif + +/* Copy SRC to DEST. */ +wchar_t * +WCSCPY (wchar_t *dest, const wchar_t *src) +{ + wint_t c,d; + wchar_t *wcp, *wcp2; + + if (__alignof__ (wchar_t) >= sizeof (wchar_t)) + { + const ptrdiff_t off = dest - src; + + wcp = (wchar_t *) src; + wcp2 = wcp + 1 ; + + do + { + d = *wcp; + wcp[off] = d; + if (d == L'\0') + return dest; + wcp += 2; + + c = *wcp2; + wcp2[off] = c; + if (c == L'\0') + return dest; + wcp2 += 2; + + d = *wcp; + wcp[off] = d; + if (d == L'\0') + return dest; + wcp += 2; + + c = *wcp2; + wcp2[off] = c; + if (c == L'\0') + return dest; + wcp2 += 2; + + d = *wcp; + wcp[off] = d; + if (d == L'\0') + return dest; + wcp += 2; + + c = *wcp2; + wcp2[off] = c; + if (c == L'\0') + return dest; + wcp2 += 2; + + d = *wcp; + wcp[off] = d; + if (d == L'\0') + return dest; + wcp += 2; + + c = *wcp2; + wcp2[off] = c; + if (c == L'\0') + return dest; + wcp2 += 2; + } + while (c != L'\0'); + + } + else + { + wcp = dest; + + do + { + c = *src++; + *wcp++ = c; + } + while (c != L'\0'); + } + return dest; +} diff --git a/REORG.TODO/sysdeps/powerpc/power6/wcsrchr.c b/REORG.TODO/sysdeps/powerpc/power6/wcsrchr.c new file mode 100644 index 0000000000..f9fb399b31 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power6/wcsrchr.c @@ -0,0 +1,89 @@ +/* wcsrchr.c - Wide Character Reverse Search for POWER6+. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <wchar.h> + +#ifndef WCSRCHR +# define WCSRCHR wcsrchr +#endif + +/* Find the last occurrence of WC in WCS. */ +wchar_t * +WCSRCHR (const wchar_t *wcs, const wchar_t wc) +{ + const wchar_t *wcs2 = wcs + 1; + const wchar_t *retval = NULL; + + if (*wcs == wc) + retval = wcs; + + if (*wcs == L'\0') return (wchar_t *) retval; + + do + { + wcs+=2; + + if (*wcs2 == wc) + retval = wcs2; + if (*wcs2 == L'\0') + return (wchar_t *) retval; + wcs2+=2; + + if (*wcs == wc) + retval = wcs; + if (*wcs == L'\0') + return (wchar_t *) retval; + wcs+=2; + + if (*wcs2 == wc) + retval = wcs2; + if (*wcs2 == L'\0') + return (wchar_t *) retval; + wcs2+=2; + + if (*wcs == wc) + retval = wcs; + if (*wcs == L'\0') + return (wchar_t *) retval; + wcs+=2; + + if (*wcs2 == wc) + retval = wcs2; + if (*wcs2 == L'\0') + return (wchar_t *) retval; + wcs2+=2; + + if (*wcs == wc) + retval = wcs; + if (*wcs == L'\0') + return (wchar_t *) retval; + wcs+=2; + + if (*wcs2 == wc) + retval = wcs2; + if (*wcs2 == L'\0') + return (wchar_t *) retval; + wcs2+=2; + + if (*wcs == wc) + retval = wcs; + } + while (*wcs != L'\0'); + + return (wchar_t *) retval; +} diff --git a/REORG.TODO/sysdeps/powerpc/power6/wordcopy.c b/REORG.TODO/sysdeps/powerpc/power6/wordcopy.c new file mode 100644 index 0000000000..545a67bf5d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power6/wordcopy.c @@ -0,0 +1,221 @@ +/* _memcopy.c -- subroutines for memory copy functions. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Torbjorn Granlund (tege@sics.se). + Updated for POWER6 by Steven Munroe (sjmunroe@us.ibm.com). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */ + +#include <stddef.h> +#include <memcopy.h> + +/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to + block beginning at DSTP with LEN `op_t' words (not LEN bytes!). + Both SRCP and DSTP should be aligned for memory operations on `op_t's. */ + +#ifndef WORDCOPY_FWD_ALIGNED +# define WORDCOPY_FWD_ALIGNED _wordcopy_fwd_aligned +#endif + +void +WORDCOPY_FWD_ALIGNED (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1; + + if (len & 1) + { + ((op_t *) dstp)[0] = ((op_t *) srcp)[0]; + + if (len == 1) + return; + srcp += OPSIZ; + dstp += OPSIZ; + len -= 1; + } + + do + { + a0 = ((op_t *) srcp)[0]; + a1 = ((op_t *) srcp)[1]; + ((op_t *) dstp)[0] = a0; + ((op_t *) dstp)[1] = a1; + + srcp += 2 * OPSIZ; + dstp += 2 * OPSIZ; + len -= 2; + } + while (len != 0); +} + +/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to + block beginning at DSTP with LEN `op_t' words (not LEN bytes!). + DSTP should be aligned for memory operations on `op_t's, but SRCP must + *not* be aligned. */ + +#define fwd_align_merge(align) \ + do \ + { \ + a1 = ((op_t *) srcp)[1]; \ + a2 = ((op_t *) srcp)[2]; \ + ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (__WORDSIZE-align*8)); \ + ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (__WORDSIZE-align*8)); \ + a0 = a2; \ + srcp += 2 * OPSIZ; \ + dstp += 2 * OPSIZ; \ + len -= 2; \ + } \ + while (len != 0) + +#ifndef WORDCOPY_FWD_DEST_ALIGNED +# define WORDCOPY_FWD_DEST_ALIGNED _wordcopy_fwd_dest_aligned +#endif + +void +WORDCOPY_FWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1, a2; + int sh_1, sh_2; + int align; + + /* Calculate how to shift a word read at the memory operation + aligned srcp to make it aligned for copy. */ + + align = srcp % OPSIZ; + sh_1 = 8 * (srcp % OPSIZ); + sh_2 = 8 * OPSIZ - sh_1; + + /* Make SRCP aligned by rounding it down to the beginning of the `op_t' + it points in the middle of. */ + srcp &= -OPSIZ; + a0 = ((op_t *) srcp)[0]; + + if (len & 1) + { + a1 = ((op_t *) srcp)[1]; + ((op_t *) dstp)[0] = MERGE (a0, sh_1, a1, sh_2); + + if (len == 1) + return; + + a0 = a1; + srcp += OPSIZ; + dstp += OPSIZ; + len -= 1; + } + + fwd_align_merge (align); + +} + +/* _wordcopy_bwd_aligned -- Copy block finishing right before + SRCP to block finishing right before DSTP with LEN `op_t' words + (not LEN bytes!). Both SRCP and DSTP should be aligned for memory + operations on `op_t's. */ + +#ifndef WORDCOPY_BWD_ALIGNED +# define WORDCOPY_BWD_ALIGNED _wordcopy_bwd_aligned +#endif + +void +WORDCOPY_BWD_ALIGNED (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1; + + if (len & 1) + { + srcp -= OPSIZ; + dstp -= OPSIZ; + ((op_t *) dstp)[0] = ((op_t *) srcp)[0]; + + if (len == 1) + return; + len -= 1; + } + + do + { + srcp -= 2 * OPSIZ; + dstp -= 2 * OPSIZ; + + a1 = ((op_t *) srcp)[1]; + a0 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[1] = a1; + ((op_t *) dstp)[0] = a0; + + len -= 2; + } + while (len != 0); +} + +#define bwd_align_merge(align) \ + do \ + { \ + srcp -= 2 * OPSIZ; \ + dstp -= 2 * OPSIZ; \ + a1 = ((op_t *) srcp)[1]; \ + a0 = ((op_t *) srcp)[0]; \ + ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (__WORDSIZE-align*8)); \ + ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (__WORDSIZE-align*8)); \ + a2 = a0; \ + len -= 2; \ + } \ + while (len != 0) + +/* _wordcopy_bwd_dest_aligned -- Copy block finishing right + before SRCP to block finishing right before DSTP with LEN `op_t' + words (not LEN bytes!). DSTP should be aligned for memory + operations on `op_t', but SRCP must *not* be aligned. */ + +#ifndef WORDCOPY_BWD_DEST_ALIGNED +# define WORDCOPY_BWD_DEST_ALIGNED _wordcopy_bwd_dest_aligned +#endif + +void +WORDCOPY_BWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) +{ + op_t a0, a1, a2; + int sh_1, sh_2; + int align; + + /* Calculate how to shift a word read at the memory operation + aligned srcp to make it aligned for copy. */ + + align = srcp % OPSIZ; + sh_1 = 8 * (srcp % OPSIZ); + sh_2 = 8 * OPSIZ - sh_1; + + /* Make srcp aligned by rounding it down to the beginning of the op_t + it points in the middle of. */ + srcp &= -OPSIZ; + a2 = ((op_t *) srcp)[0]; + + if (len & 1) + { + srcp -= OPSIZ; + dstp -= OPSIZ; + a1 = ((op_t *) srcp)[0]; + ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2); + + if (len == 1) + return; + + a2 = a1; + len -= 1; + } + + bwd_align_merge (align); +} diff --git a/REORG.TODO/sysdeps/powerpc/power7/fpu/s_logb.c b/REORG.TODO/sysdeps/powerpc/power7/fpu/s_logb.c new file mode 100644 index 0000000000..af74b1d024 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power7/fpu/s_logb.c @@ -0,0 +1,79 @@ +/* logb(). PowerPC/POWER7 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math_ldbl_opt.h> + +/* This implementation avoids FP to INT conversions by using VSX + bitwise instructions over FP values. */ + +static const double two1div52 = 2.220446049250313e-16; /* 1/2**52 */ +static const double two10m1 = -1023.0; /* 2**10 -1 */ + +/* FP mask to extract the exponent. */ +static const union { + unsigned long long mask; + double d; +} mask = { 0x7ff0000000000000ULL }; + +double +__logb (double x) +{ + double ret; + + if (__builtin_expect (x == 0.0, 0)) + /* Raise FE_DIVBYZERO and return -HUGE_VAL[LF]. */ + return -1.0 / __builtin_fabs (x); + + /* ret = x & 0x7ff0000000000000; */ + asm ( + "xxland %x0,%x1,%x2\n" + "fcfid %0,%0" + : "=f" (ret) + : "f" (x), "f" (mask.d)); + /* ret = (ret >> 52) - 1023.0; */ + ret = (ret * two1div52) + two10m1; + if (__builtin_expect (ret > -two10m1, 0)) + /* Multiplication is used to set logb (+-INF) = INF. */ + return (x * x); + else if (__builtin_expect (ret == two10m1, 0)) + { + /* POSIX specifies that denormal numbers are treated as + though they were normalized. */ + int32_t lx, ix; + int ma; + + EXTRACT_WORDS (ix, lx, x); + ix &= 0x7fffffff; + if (ix == 0) + ma = __builtin_clz (lx) + 32; + else + ma = __builtin_clz (ix); + return (double) (-1023 - (ma - 12)); + } + /* Test to avoid logb_downward (0.0) == -0.0. */ + return ret == -0.0 ? 0.0 : ret; +} +weak_alias (__logb, logb) +#ifdef NO_LONG_DOUBLE +strong_alias (__logb, __logbl) +weak_alias (__logb, logbl) +#endif + +#if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, logb, logbl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/power7/fpu/s_logbf.c b/REORG.TODO/sysdeps/powerpc/power7/fpu/s_logbf.c new file mode 100644 index 0000000000..1461327cd1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power7/fpu/s_logbf.c @@ -0,0 +1,60 @@ +/* logbf(). PowerPC/POWER7 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "math_private.h" + +/* This implementation avoids FP to INT conversions by using VSX + bitwise instructions over FP values. */ + +static const double two1div52 = 2.220446049250313e-16; /* 1/2**52 */ +static const double two10m1 = -1023.0; /* -2**10 + 1 */ +static const double two7m1 = -127.0; /* -2**7 + 1 */ + +/* FP mask to extract the exponent. */ +static const union { + unsigned long long mask; + double d; +} mask = { 0x7ff0000000000000ULL }; + +float +__logbf (float x) +{ + /* VSX operation are all done internally as double. */ + double ret; + + if (__builtin_expect (x == 0.0, 0)) + /* Raise FE_DIVBYZERO and return -HUGE_VAL[LF]. */ + return -1.0 / __builtin_fabsf (x); + + /* ret = x & 0x7f800000; */ + asm ( + "xxland %x0,%x1,%x2\n" + "fcfid %0,%0" + : "=f"(ret) + : "f" (x), "f" (mask.d)); + /* ret = (ret >> 52) - 1023.0, since ret is double. */ + ret = (ret * two1div52) + two10m1; + if (__builtin_expect (ret > -two7m1, 0)) + /* Multiplication is used to set logb (+-INF) = INF. */ + return (x * x); + /* Since operations are done with double we don't need + additional tests for subnormal numbers. + The test is to avoid logb_downward (0.0) == -0.0. */ + return ret == -0.0 ? 0.0 : ret; +} +weak_alias (__logbf, logbf) diff --git a/REORG.TODO/sysdeps/powerpc/power7/fpu/s_logbl.c b/REORG.TODO/sysdeps/powerpc/power7/fpu/s_logbl.c new file mode 100644 index 0000000000..3ae383a831 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power7/fpu/s_logbl.c @@ -0,0 +1,83 @@ +/* logbl(). PowerPC/POWER7 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <math_ldbl_opt.h> + +/* This implementation avoids FP to INT conversions by using VSX + bitwise instructions over FP values. */ + +static const double two1div52 = 2.220446049250313e-16; /* 1/2**52 */ +static const double two10m1 = -1023.0; /* 2**10 -1 */ + +/* FP mask to extract the exponent. */ +static const union { + unsigned long long mask; + double d; +} mask = { 0x7ff0000000000000ULL }; + +long double +__logbl (long double x) +{ + double xh, xl; + double ret; + int64_t hx; + + if (__builtin_expect (x == 0.0L, 0)) + /* Raise FE_DIVBYZERO and return -HUGE_VAL[LF]. */ + return -1.0L / __builtin_fabsl (x); + + ldbl_unpack (x, &xh, &xl); + EXTRACT_WORDS64 (hx, xh); + /* ret = x & 0x7ff0000000000000; */ + asm ( + "xxland %x0,%x1,%x2\n" + "fcfid %0,%0" + : "=f" (ret) + : "f" (xh), "f" (mask.d)); + /* ret = (ret >> 52) - 1023.0; */ + ret = (ret * two1div52) + two10m1; + if (__builtin_expect (ret > -two10m1, 0)) + /* Multiplication is used to set logb (+-INF) = INF. */ + return (xh * xh); + else if (__builtin_expect (ret == two10m1, 0)) + { + /* POSIX specifies that denormal number is treated as + though it were normalized. */ + return (long double) (- (__builtin_clzll (hx & 0x7fffffffffffffffLL) \ + - 12) - 1023); + } + else if ((hx & 0x000fffffffffffffLL) == 0) + { + /* If the high part is a power of 2, and the low part is nonzero + with the opposite sign, the low part affects the + exponent. */ + int64_t lx, rhx; + EXTRACT_WORDS64 (lx, xl); + rhx = (hx & 0x7ff0000000000000LL) >> 52; + if ((hx ^ lx) < 0 && (lx & 0x7fffffffffffffffLL) != 0) + rhx--; + return (long double) (rhx - 1023); + } + /* Test to avoid logb_downward (0.0) == -0.0. */ + return ret == -0.0 ? 0.0 : ret; +} +#ifndef __logbl +long_double_symbol (libm, __logbl, logbl); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/405/memcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/405/memcmp.S new file mode 100644 index 0000000000..d1865140eb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/405/memcmp.S @@ -0,0 +1,128 @@ +/* Optimized memcmp implementation for PowerPC476. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* memcmp + + r3:source1 address, return equality + r4:source2 address + r5:byte count + + Check 2 words from src1 and src2. If unequal jump to end and + return src1 > src2 or src1 < src2. + If count = zero check bytes before zero counter and then jump to end and + return src1 > src2, src1 < src2 or src1 = src2. + If src1 = src2 and no null, repeat. */ + +EALIGN (memcmp, 5, 0) + srwi. r6,r5,5 + beq L(preword2_count_loop) + mtctr r6 + clrlwi r5,r5,27 + +L(word8_compare_loop): + lwz r10,0(r3) + lwz r6,4(r3) + lwz r8,0(r4) + lwz r9,4(r4) + cmplw cr5,r8,r10 + cmplw cr1,r9,r6 + bne cr5,L(st2) + bne cr1,L(st1) + lwz r10,8(r3) + lwz r6,12(r3) + lwz r8,8(r4) + lwz r9,12(r4) + cmplw cr5,r8,r10 + cmplw cr1,r9,r6 + bne cr5,L(st2) + bne cr1,L(st1) + lwz r10,16(r3) + lwz r6,20(r3) + lwz r8,16(r4) + lwz r9,20(r4) + cmplw cr5,r8,r10 + cmplw cr1,r9,r6 + bne cr5,L(st2) + bne cr1,L(st1) + lwz r10,24(r3) + lwz r6,28(r3) + addi r3,r3,0x20 + lwz r8,24(r4) + lwz r9,28(r4) + addi r4,r4,0x20 + cmplw cr5,r8,r10 + cmplw cr1,r9,r6 + bne cr5,L(st2) + bne cr1,L(st1) + bdnz L(word8_compare_loop) + +L(preword2_count_loop): + srwi. r6,r5,3 + beq L(prebyte_count_loop) + mtctr r6 + clrlwi r5,r5,29 + +L(word2_count_loop): + lwz r10,0(r3) + lwz r6,4(r3) + addi r3,r3,0x08 + lwz r8,0(r4) + lwz r9,4(r4) + addi r4,r4,0x08 + cmplw cr5,r8,r10 + cmplw cr1,r9,r6 + bne cr5,L(st2) + bne cr1,L(st1) + bdnz L(word2_count_loop) + +L(prebyte_count_loop): + addi r5,r5,1 + mtctr r5 + bdz L(end_memcmp) + +L(byte_count_loop): + lbz r6,0(r3) + addi r3,r3,0x01 + lbz r8,0(r4) + addi r4,r4,0x01 + cmplw cr5,r8,r6 + bne cr5,L(st2) + bdnz L(byte_count_loop) + +L(end_memcmp): + addi r3,r0,0 + blr + +L(l_r): + addi r3,r0,1 + blr + +L(st1): + blt cr1,L(l_r) + addi r3,r0,-1 + blr + +L(st2): + blt cr5,L(l_r) + addi r3,r0,-1 + blr +END (memcmp) +libc_hidden_builtin_def (memcmp) +weak_alias (memcmp,bcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/405/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/405/memcpy.S new file mode 100644 index 0000000000..9878dbceac --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/405/memcpy.S @@ -0,0 +1,130 @@ +/* Optimized memcpy implementation for PowerPC476. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* memcpy + + r0:return address + r3:destination address + r4:source address + r5:byte count + + Save return address in r0. + If destinationn and source are unaligned and copy count is greater than 256 + then copy 0-3 bytes to make destination aligned. + If 32 or more bytes to copy we use 32 byte copy loop. + Finaly we copy 0-31 extra bytes. */ + +EALIGN (memcpy, 5, 0) +/* Check if bytes to copy are greater than 256 and if + source and destination are unaligned */ + cmpwi r5,0x0100 + addi r0,r3,0 + ble L(string_count_loop) + neg r6,r3 + clrlwi. r6,r6,30 + beq L(string_count_loop) + neg r6,r4 + clrlwi. r6,r6,30 + beq L(string_count_loop) + mtctr r6 + subf r5,r6,r5 + +L(unaligned_bytecopy_loop): /* Align destination by coping 0-3 bytes */ + lbz r8,0x0(r4) + addi r4,r4,1 + stb r8,0x0(r3) + addi r3,r3,1 + bdnz L(unaligned_bytecopy_loop) + srwi. r7,r5,5 + beq L(preword2_count_loop) + mtctr r7 + +L(word8_count_loop_no_dcbt): /* Copy 32 bytes at a time */ + lwz r6,0(r4) + lwz r7,4(r4) + lwz r8,8(r4) + lwz r9,12(r4) + subi r5,r5,0x20 + stw r6,0(r3) + stw r7,4(r3) + stw r8,8(r3) + stw r9,12(r3) + lwz r6,16(r4) + lwz r7,20(r4) + lwz r8,24(r4) + lwz r9,28(r4) + addi r4,r4,0x20 + stw r6,16(r3) + stw r7,20(r3) + stw r8,24(r3) + stw r9,28(r3) + addi r3,r3,0x20 + bdnz L(word8_count_loop_no_dcbt) + +L(preword2_count_loop): /* Copy remaining 0-31 bytes */ + clrlwi. r12,r5,27 + beq L(end_memcpy) + mtxer r12 + lswx r5,0,r4 + stswx r5,0,r3 + mr r3,r0 + blr + +L(string_count_loop): /* Copy odd 0-31 bytes */ + clrlwi. r12,r5,28 + add r3,r3,r5 + add r4,r4,r5 + beq L(pre_string_copy) + mtxer r12 + subf r4,r12,r4 + subf r3,r12,r3 + lswx r6,0,r4 + stswx r6,0,r3 + +L(pre_string_copy): /* Check how many 32 byte chunks to copy */ + srwi. r7,r5,4 + beq L(end_memcpy) + mtctr r7 + +L(word4_count_loop_no_dcbt): /* Copy 32 bytes at a time */ + lwz r6,-4(r4) + lwz r7,-8(r4) + lwz r8,-12(r4) + lwzu r9,-16(r4) + stw r6,-4(r3) + stw r7,-8(r3) + stw r8,-12(r3) + stwu r9,-16(r3) + bdz L(end_memcpy) + lwz r6,-4(r4) + lwz r7,-8(r4) + lwz r8,-12(r4) + lwzu r9,-16(r4) + stw r6,-4(r3) + stw r7,-8(r3) + stw r8,-12(r3) + stwu r9,-16(r3) + bdnz L(word4_count_loop_no_dcbt) + +L(end_memcpy): + mr r3,r0 + blr +END (memcpy) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/405/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc32/405/memset.S new file mode 100644 index 0000000000..18aea515ba --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/405/memset.S @@ -0,0 +1,152 @@ +/* Optimized memset for PowerPC405,440,464 (32-byte cacheline). + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* memset + + r3:destination address and return address + r4:source integer to copy + r5:byte count + r11:sources integer to copy in all 32 bits of reg + r12:temp return address + + Save return address in r12 + If destinationn is unaligned and count is greater tha 255 bytes + set 0-3 bytes to make destination aligned + If count is greater tha 255 bytes and setting zero to memory + use dbcz to set memeory when we can + otherwsie do the follwoing + If 16 or more words to set we use 16 word copy loop. + Finaly we set 0-15 extra bytes with string store. */ + +EALIGN (memset, 5, 0) + rlwinm r11,r4,0,24,31 + rlwimi r11,r4,8,16,23 + rlwimi r11,r11,16,0,15 + addi r12,r3,0 + cmpwi r5,0x00FF + ble L(preword8_count_loop) + cmpwi r4,0x00 + beq L(use_dcbz) + neg r6,r3 + clrlwi. r6,r6,30 + beq L(preword8_count_loop) + addi r8,0,1 + mtctr r6 + subi r3,r3,1 + +L(unaligned_bytecopy_loop): + stbu r11,0x1(r3) + subf. r5,r8,r5 + beq L(end_memset) + bdnz L(unaligned_bytecopy_loop) + addi r3,r3,1 + +L(preword8_count_loop): + srwi. r6,r5,4 + beq L(preword2_count_loop) + mtctr r6 + addi r3,r3,-4 + mr r8,r11 + mr r9,r11 + mr r10,r11 + +L(word8_count_loop_no_dcbt): + stwu r8,4(r3) + stwu r9,4(r3) + subi r5,r5,0x10 + stwu r10,4(r3) + stwu r11,4(r3) + bdnz L(word8_count_loop_no_dcbt) + addi r3,r3,4 + +L(preword2_count_loop): + clrlwi. r7,r5,28 + beq L(end_memset) + mr r8,r11 + mr r9,r11 + mr r10,r11 + mtxer r7 + stswx r8,0,r3 + +L(end_memset): + addi r3,r12,0 + blr + +L(use_dcbz): + neg r6,r3 + clrlwi. r7,r6,28 + beq L(skip_string_loop) + mr r8,r11 + mr r9,r11 + mr r10,r11 + subf r5,r7,r5 + mtxer r7 + stswx r8,0,r3 + add r3,r3,r7 + +L(skip_string_loop): + clrlwi r8,r6,27 + srwi. r8,r8,4 + beq L(dcbz_pre_loop) + mtctr r8 + +L(word_loop): + stw r11,0(r3) + subi r5,r5,0x10 + stw r11,4(r3) + stw r11,8(r3) + stw r11,12(r3) + addi r3,r3,0x10 + bdnz L(word_loop) + +L(dcbz_pre_loop): + srwi r6,r5,5 + mtctr r6 + addi r7,0,0 + +L(dcbz_loop): + dcbz r3,r7 + addi r3,r3,0x20 + subi r5,r5,0x20 + bdnz L(dcbz_loop) + srwi. r6,r5,4 + beq L(postword2_count_loop) + mtctr r6 + +L(postword8_count_loop): + stw r11,0(r3) + subi r5,r5,0x10 + stw r11,4(r3) + stw r11,8(r3) + stw r11,12(r3) + addi r3,r3,0x10 + bdnz L(postword8_count_loop) + +L(postword2_count_loop): + clrlwi. r7,r5,28 + beq L(end_memset) + mr r8,r11 + mr r9,r11 + mr r10,r11 + mtxer r7 + stswx r8,0,r3 + b L(end_memset) +END (memset) +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/405/strcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/405/strcmp.S new file mode 100644 index 0000000000..42e04e9552 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/405/strcmp.S @@ -0,0 +1,134 @@ +/* Optimized strcmp implementation for PowerPC476. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* strcmp + + Register Use + r0:temp return equality + r3:source1 address, return equality + r4:source2 address + + Implementation description + Check 2 words from src1 and src2. If unequal jump to end and + return src1 > src2 or src1 < src2. + If null check bytes before null and then jump to end and + return src1 > src2, src1 < src2 or src1 = src2. + If src1 = src2 and no null, repeat. */ + +EALIGN (strcmp,5,0) + neg r7,r3 + clrlwi r7,r7,20 + neg r8,r4 + clrlwi r8,r8,20 + srwi. r7,r7,5 + beq L(byte_loop) + srwi. r8,r8,5 + beq L(byte_loop) + cmplw r7,r8 + mtctr r7 + ble L(big_loop) + mtctr r8 + +L(big_loop): + lwz r5,0(r3) + lwz r6,4(r3) + lwz r8,0(r4) + lwz r9,4(r4) + dlmzb. r12,r5,r6 + bne L(end_check) + cmplw r5,r8 + bne L(st1) + cmplw r6,r9 + bne L(st1) + lwz r5,8(r3) + lwz r6,12(r3) + lwz r8,8(r4) + lwz r9,12(r4) + dlmzb. r12,r5,r6 + bne L(end_check) + cmplw r5,r8 + bne L(st1) + cmplw r6,r9 + bne L(st1) + lwz r5,16(r3) + lwz r6,20(r3) + lwz r8,16(r4) + lwz r9,20(r4) + dlmzb. r12,r5,r6 + bne L(end_check) + cmplw r5,r8 + bne L(st1) + cmplw r6,r9 + bne L(st1) + lwz r5,24(r3) + lwz r6,28(r3) + addi r3,r3,0x20 + lwz r8,24(r4) + lwz r9,28(r4) + addi r4,r4,0x20 + dlmzb. r12,r5,r6 + bne L(end_check) + cmplw r5,r8 + bne L(st1) + cmplw r6,r9 + bne L(st1) + bdnz L(big_loop) + b L(byte_loop) + +L(end_check): + subfic r12,r12,4 + blt L(end_check2) + rlwinm r12,r12,3,0,31 + srw r5,r5,r12 + srw r8,r8,r12 + cmplw r5,r8 + bne L(st1) + b L(end_strcmp) + +L(end_check2): + addi r12,r12,4 + cmplw r5,r8 + rlwinm r12,r12,3,0,31 + bne L(st1) + srw r6,r6,r12 + srw r9,r9,r12 + cmplw r6,r9 + bne L(st1) + +L(end_strcmp): + addi r3,r0,0 + blr + +L(st1): + mfcr r3 + blr + +L(byte_loop): + lbz r5,0(r3) + addi r3,r3,1 + lbz r6,0(r4) + addi r4,r4,1 + cmplw r5,r6 + bne L(st1) + cmpwi r5,0 + beq L(end_strcmp) + b L(byte_loop) +END (strcmp) +libc_hidden_builtin_def (strcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/405/strcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/405/strcpy.S new file mode 100644 index 0000000000..2a554dc32e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/405/strcpy.S @@ -0,0 +1,107 @@ +/* Optimized strcpy implementation for PowerPC476. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* strcpy + + Register Use + r3:destination and return address + r4:source address + r10:temp destination address + + Implementation description + Loop by checking 2 words at a time, with dlmzb. Check if there is a null + in the 2 words. If there is a null jump to end checking to determine + where in the last 8 bytes it is. Copy the appropriate bytes of the last + 8 according to the null position. */ + +EALIGN (strcpy, 5, 0) + neg r7,r4 + subi r4,r4,1 + clrlwi. r8,r7,29 + subi r10,r3,1 + beq L(pre_word8_loop) + mtctr r8 + +L(loop): + lbzu r5,0x01(r4) + cmpi cr5,r5,0x0 + stbu r5,0x01(r10) + beq cr5,L(end_strcpy) + bdnz L(loop) + +L(pre_word8_loop): + subi r4,r4,3 + subi r10,r10,3 + +L(word8_loop): + lwzu r5,0x04(r4) + lwzu r6,0x04(r4) + dlmzb. r11,r5,r6 + bne L(byte_copy) + stwu r5,0x04(r10) + stwu r6,0x04(r10) + lwzu r5,0x04(r4) + lwzu r6,0x04(r4) + dlmzb. r11,r5,r6 + bne L(byte_copy) + stwu r5,0x04(r10) + stwu r6,0x04(r10) + lwzu r5,0x04(r4) + lwzu r6,0x04(r4) + dlmzb. r11,r5,r6 + bne L(byte_copy) + stwu r5,0x04(r10) + stwu r6,0x04(r10) + lwzu r5,0x04(r4) + lwzu r6,0x04(r4) + dlmzb. r11,r5,r6 + bne L(byte_copy) + stwu r5,0x04(r10) + stwu r6,0x04(r10) + b L(word8_loop) + +L(last_bytes_copy): + stwu r5,0x04(r10) + subi r11,r11,4 + mtctr r11 + addi r10,r10,3 + subi r4,r4,1 + +L(last_bytes_copy_loop): + lbzu r5,0x01(r4) + stbu r5,0x01(r10) + bdnz L(last_bytes_copy_loop) + blr + +L(byte_copy): + blt L(last_bytes_copy) + mtctr r11 + addi r10,r10,3 + subi r4,r4,5 + +L(last_bytes_copy_loop2): + lbzu r5,0x01(r4) + stbu r5,0x01(r10) + bdnz L(last_bytes_copy_loop2) + +L(end_strcpy): + blr +END (strcpy) +libc_hidden_builtin_def (strcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/405/strlen.S b/REORG.TODO/sysdeps/powerpc/powerpc32/405/strlen.S new file mode 100644 index 0000000000..25e54dafc7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/405/strlen.S @@ -0,0 +1,75 @@ +/* Optimized strlen implementation for PowerPC476. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* strlen + + Register Use + r3:source address and return length of string + r4:byte counter + + Implementation description + Load 2 words at a time and count bytes, if we find null we subtract one from + the count and return the count value. We need to subtract one because + we don't count the null character as a byte. */ + +EALIGN (strlen,5,0) + neg r7,r3 + clrlwi. r8,r7,29 + addi r4,0,0 + beq L(byte_count_loop) + mtctr r8 + +L(loop): + lbz r5,0(r3) + cmpi cr5,r5,0x0 + addi r3,r3,0x1 + addi r4,r4,0x1 + beq cr5,L(end_strlen) + bdnz L(loop) + +L(byte_count_loop): + lwz r5,0(r3) + lwz r6,4(r3) + dlmzb. r12,r5,r6 + add r4,r4,r12 + bne L(end_strlen) + lwz r5,8(r3) + lwz r6,12(r3) + dlmzb. r12,r5,r6 + add r4,r4,r12 + bne L(end_strlen) + lwz r5,16(r3) + lwz r6,20(r3) + dlmzb. r12,r5,r6 + add r4,r4,r12 + bne L(end_strlen) + lwz r5,24(r3) + lwz r6,28(r3) + addi r3,r3,0x20 + dlmzb. r12,r5,r6 + add r4,r4,r12 + bne L(end_strlen) + b L(byte_count_loop) + +L(end_strlen): + addi r3,r4,-1 + blr +END (strlen) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/405/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/405/strncmp.S new file mode 100644 index 0000000000..9c5a8feb9d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/405/strncmp.S @@ -0,0 +1,128 @@ +/* Optimized strncmp implementation for PowerPC476. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* strncmp + + Register Use + r0:temp return equality + r3:source1 address, return equality + r4:source2 address + r5:byte count + + Implementation description + Touch in 3 lines of D-cache. + If source1 or source2 is unaligned copy 0-3 bytes to make source1 aligned + Check 2 words from src1 and src2. If unequal jump to end and + return src1 > src2 or src1 < src2. + If null check bytes before null and then jump to end and + return src1 > src2, src1 < src2 or src1 = src2. + If count = zero check bytes before zero counter and then jump to end and + return src1 > src2, src1 < src2 or src1 = src2. + If src1 = src2 and no null, repeat. */ + +EALIGN (strncmp,5,0) + neg r7,r3 + clrlwi r7,r7,20 + neg r8,r4 + clrlwi r8,r8,20 + srwi. r7,r7,3 + beq L(prebyte_count_loop) + srwi. r8,r8,3 + beq L(prebyte_count_loop) + cmplw r7,r8 + mtctr r7 + ble L(preword2_count_loop) + mtctr r8 + +L(preword2_count_loop): + srwi. r6,r5,3 + beq L(prebyte_count_loop) + mfctr r7 + cmplw r6,r7 + bgt L(set_count_loop) + mtctr r6 + clrlwi r5,r5,29 + +L(word2_count_loop): + lwz r10,0(r3) + lwz r6,4(r3) + addi r3,r3,0x08 + lwz r8,0(r4) + lwz r9,4(r4) + addi r4,r4,0x08 + dlmzb. r12,r10,r6 + bne L(end_check) + cmplw r10,r8 + bne L(st1) + cmplw r6,r9 + bne L(st1) + bdnz L(word2_count_loop) + +L(prebyte_count_loop): + addi r5,r5,1 + mtctr r5 + bdz L(end_strncmp) + +L(byte_count_loop): + lbz r6,0(r3) + addi r3,r3,1 + lbz r7,0(r4) + addi r4,r4,1 + cmplw r6,r7 + bne L(st1) + cmpwi r6,0 + beq L(end_strncmp) + bdnz L(byte_count_loop) + b L(end_strncmp) + +L(set_count_loop): + slwi r7,r7,3 + subf r5,r7,r5 + b L(word2_count_loop) + +L(end_check): + subfic r12,r12,4 + blt L(end_check2) + rlwinm r12,r12,3,0,31 + srw r10,r10,r12 + srw r8,r8,r12 + cmplw r10,r8 + bne L(st1) + b L(end_strncmp) + +L(end_check2): + addi r12,r12,4 + cmplw r10,r8 + rlwinm r12,r12,3,0,31 + bne L(st1) + srw r6,r6,r12 + srw r9,r9,r12 + cmplw r6,r9 + bne L(st1) + +L(end_strncmp): + addi r3,r0,0 + blr + +L(st1): + mfcr r3 + blr +END (strncmp) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/440/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/440/Implies new file mode 100644 index 0000000000..70c0d2eda3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/440/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/405/fpu +powerpc/powerpc32/405 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/464/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/464/Implies new file mode 100644 index 0000000000..c3e52c5504 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/464/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/440/fpu +powerpc/powerpc32/440 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/476/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/476/Implies new file mode 100644 index 0000000000..2829f9ccaf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/476/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/464/fpu +powerpc/powerpc32/464 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/476/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc32/476/memset.S new file mode 100644 index 0000000000..17c91238e1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/476/memset.S @@ -0,0 +1,152 @@ +/* Optimized memset for PowerPC476 (128-byte cacheline). + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* memset + + r3:destination address and return address + r4:source integer to copy + r5:byte count + r11:sources integer to copy in all 32 bits of reg + r12:temp return address + + Save return address in r12 + If destinationn is unaligned and count is greater tha 255 bytes + set 0-3 bytes to make destination aligned + If count is greater tha 255 bytes and setting zero to memory + use dbcz to set memeory when we can + otherwsie do the follwoing + If 16 or more words to set we use 16 word copy loop. + Finaly we set 0-15 extra bytes with string store. */ + +EALIGN (memset, 5, 0) + rlwinm r11,r4,0,24,31 + rlwimi r11,r4,8,16,23 + rlwimi r11,r11,16,0,15 + addi r12,r3,0 + cmpwi r5,0x00FF + ble L(preword8_count_loop) + cmpwi r4,0x00 + beq L(use_dcbz) + neg r6,r3 + clrlwi. r6,r6,30 + beq L(preword8_count_loop) + addi r8,0,1 + mtctr r6 + subi r3,r3,1 + +L(unaligned_bytecopy_loop): + stbu r11,0x1(r3) + subf. r5,r8,r5 + beq L(end_memset) + bdnz L(unaligned_bytecopy_loop) + addi r3,r3,1 + +L(preword8_count_loop): + srwi. r6,r5,4 + beq L(preword2_count_loop) + mtctr r6 + addi r3,r3,-4 + mr r8,r11 + mr r9,r11 + mr r10,r11 + +L(word8_count_loop_no_dcbt): + stwu r8,4(r3) + stwu r9,4(r3) + subi r5,r5,0x10 + stwu r10,4(r3) + stwu r11,4(r3) + bdnz L(word8_count_loop_no_dcbt) + addi r3,r3,4 + +L(preword2_count_loop): + clrlwi. r7,r5,28 + beq L(end_memset) + mr r8,r11 + mr r9,r11 + mr r10,r11 + mtxer r7 + stswx r8,0,r3 + +L(end_memset): + addi r3,r12,0 + blr + +L(use_dcbz): + neg r6,r3 + clrlwi. r7,r6,28 + beq L(skip_string_loop) + mr r8,r11 + mr r9,r11 + mr r10,r11 + subf r5,r7,r5 + mtxer r7 + stswx r8,0,r3 + add r3,r3,r7 + +L(skip_string_loop): + clrlwi r8,r6,25 + srwi. r8,r8,4 + beq L(dcbz_pre_loop) + mtctr r8 + +L(word_loop): + stw r11,0(r3) + subi r5,r5,0x10 + stw r11,4(r3) + stw r11,8(r3) + stw r11,12(r3) + addi r3,r3,0x10 + bdnz L(word_loop) + +L(dcbz_pre_loop): + srwi r6,r5,7 + mtctr r6 + addi r7,0,0 + +L(dcbz_loop): + dcbz r3,r7 + addi r3,r3,0x80 + subi r5,r5,0x80 + bdnz L(dcbz_loop) + srwi. r6,r5,4 + beq L(postword2_count_loop) + mtctr r6 + +L(postword8_count_loop): + stw r11,0(r3) + subi r5,r5,0x10 + stw r11,4(r3) + stw r11,8(r3) + stw r11,12(r3) + addi r3,r3,0x10 + bdnz L(postword8_count_loop) + +L(postword2_count_loop): + clrlwi. r7,r5,28 + beq L(end_memset) + mr r8,r11 + mr r9,r11 + mr r10,r11 + mtxer r7 + stswx r8,0,r3 + b L(end_memset) +END (memset) +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/970/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/970/Implies new file mode 100644 index 0000000000..17139bf21c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/970/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/power4/fpu +powerpc/powerpc32/power4 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/Implies new file mode 100644 index 0000000000..39a34c5f57 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/Implies @@ -0,0 +1 @@ +wordsize-32 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc32/Makefile new file mode 100644 index 0000000000..cf620c8269 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/Makefile @@ -0,0 +1,49 @@ +# Powerpc32 specific build options. + +# Some Powerpc32 variants assume soft-fp is the default even though there is +# an fp variant so provide -mhard-float if --with-fp is explicitly passed. + +ifeq ($(with-fp),yes) ++cflags += -mhard-float +ASFLAGS += -mhard-float +sysdep-LDFLAGS += -mhard-float +endif + +ifeq ($(subdir),gmon) +sysdep_routines += ppc-mcount compat-ppc-mcount +static-only-routines += ppc-mcount +shared-only-routines += compat-ppc-mcount +endif + +ifeq ($(subdir),misc) +sysdep_routines += gprsave0 gprrest0 gprsave1 gprrest1 +endif + +# On PPC, -fpic works until the GOT contains 32768 bytes, and possibly +# more depending on how clever the linker is. Each GOT entry takes 4 bytes, +# so that's at least 8192 entries. Since libc only uses about 2000 entries, +# we want to use -fpic, because this generates fewer relocs. +ifeq (yes,$(build-shared)) +pic-ccflag = -fpic +endif + +ifeq ($(subdir),csu) +# There is no benefit to using sdata for these objects, and the user +# of the library should be able to control what goes into sdata. +CFLAGS-init.o = -G0 +CFLAGS-gmon-start.o = -G0 + +ifeq (yes,$(build-shared)) +# Compatibility +ifeq (yes,$(have-protected)) +CPPFLAGS-libgcc-compat.S = -DHAVE_DOT_HIDDEN +endif +sysdep_routines += libgcc-compat +shared-only-routines += libgcc-compat +endif +endif + +ifeq ($(subdir),elf) +# extra shared linker files to link only into dl-allobjs.so +sysdep-rtld-routines += dl-start +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/Versions b/REORG.TODO/sysdeps/powerpc/powerpc32/Versions new file mode 100644 index 0000000000..b0782fecd4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/Versions @@ -0,0 +1,40 @@ +libc { + GLIBC_2.0 { + # Functions from libgcc. + __divdi3; __moddi3; __udivdi3; __umoddi3; + __cmpdi2; __ucmpdi2; + __ashldi3; __ashrdi3; __lshrdi3; + __fixdfdi; __fixunsdfdi; + __fixsfdi; __fixunssfdi; + __floatdidf; __floatdisf; + } + GLIBC_2.16 { + __mcount_internal; + } + GLIBC_PRIVATE { + __mcount_internal; + } +} + +libm { + GLIBC_2.2 { + # Special functions to save and restore registers used by the + # runtime libraries. + _restgpr0_13; _restgpr0_14; _restgpr0_15; _restgpr0_16; _restgpr0_17; + _restgpr0_18; _restgpr0_19; _restgpr0_20; _restgpr0_21; _restgpr0_22; + _restgpr0_22; _restgpr0_23; _restgpr0_24; _restgpr0_25; _restgpr0_26; + _restgpr0_27; _restgpr0_28; _restgpr0_29; _restgpr0_30; _restgpr0_31; + _savegpr0_13; _savegpr0_14; _savegpr0_15; _savegpr0_16; _savegpr0_17; + _savegpr0_18; _savegpr0_19; _savegpr0_20; _savegpr0_21; _savegpr0_22; + _savegpr0_22; _savegpr0_23; _savegpr0_24; _savegpr0_25; _savegpr0_26; + _savegpr0_27; _savegpr0_28; _savegpr0_29; _savegpr0_30; _savegpr0_31; + _restgpr1_13; _restgpr1_14; _restgpr1_15; _restgpr1_16; _restgpr1_17; + _restgpr1_18; _restgpr1_19; _restgpr1_20; _restgpr1_21; _restgpr1_22; + _restgpr1_22; _restgpr1_23; _restgpr1_24; _restgpr1_25; _restgpr1_26; + _restgpr1_27; _restgpr1_28; _restgpr1_29; _restgpr1_30; _restgpr1_31; + _savegpr1_13; _savegpr1_14; _savegpr1_15; _savegpr1_16; _savegpr1_17; + _savegpr1_18; _savegpr1_19; _savegpr1_20; _savegpr1_21; _savegpr1_22; + _savegpr1_22; _savegpr1_23; _savegpr1_24; _savegpr1_25; _savegpr1_26; + _savegpr1_27; _savegpr1_28; _savegpr1_29; _savegpr1_30; _savegpr1_31; + } +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/__longjmp-common.S b/REORG.TODO/sysdeps/powerpc/powerpc32/__longjmp-common.S new file mode 100644 index 0000000000..4b60a2f9a8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/__longjmp-common.S @@ -0,0 +1,82 @@ +/* longjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <stap-probe.h> +#define _ASM +#ifdef __NO_VMX__ +# include <novmxsetjmp.h> +#else +# include <jmpbuf-offsets.h> +#endif + +#if defined __SPE__ || (defined __NO_FPRS__ && !defined _SOFT_FLOAT) +# define LOAD_GP(N) evldd r##N,((JB_FPRS+((N)-14)*2)*4)(r3) +#else +# define LOAD_GP(N) lwz r##N,((JB_GPRS+(N)-14)*4)(r3) +#endif + +ENTRY (__longjmp_symbol) + +#if defined PTR_DEMANGLE || defined CHECK_SP + lwz r24,(JB_GPR1*4)(r3) +# ifdef CHECK_SP +# ifdef PTR_DEMANGLE + PTR_DEMANGLE3 (r24, r24, r25) +# endif + CHECK_SP (r24) + mr r1,r24 +# endif +#else + lwz r1,(JB_GPR1*4)(r3) +#endif + lwz r0,(JB_LR*4)(r3) + LOAD_GP (14) + LOAD_GP (15) + LOAD_GP (16) + LOAD_GP (17) + LOAD_GP (18) + LOAD_GP (19) + LOAD_GP (20) +#ifdef PTR_DEMANGLE +# ifndef CHECK_SP + PTR_DEMANGLE3 (r1, r24, r25) +# endif + PTR_DEMANGLE2 (r0, r25) +#endif + /* longjmp/longjmp_target probe expects longjmp first argument (4@3), + second argument (-4@4), and target address (4@0), respectively. */ + LIBC_PROBE (longjmp, 3, 4@3, -4@4, 4@0) + mtlr r0 + LOAD_GP (21) + LOAD_GP (22) + lwz r5,(JB_CR*4)(r3) + LOAD_GP (23) + LOAD_GP (24) + LOAD_GP (25) + mtcrf 0xFF,r5 + LOAD_GP (26) + LOAD_GP (27) + LOAD_GP (28) + LOAD_GP (29) + LOAD_GP (30) + LOAD_GP (31) + LIBC_PROBE (longjmp_target, 3, 4@3, -4@4, 4@0) + mr r3,r4 + blr +END (__longjmp_symbol) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/__longjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/__longjmp.S new file mode 100644 index 0000000000..42127c6ff2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/__longjmp.S @@ -0,0 +1,39 @@ +/* AltiVec/VMX (new) version of __longjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libc-symbols.h> +#include <shlib-compat.h> + +#if !IS_IN (libc) +/* Build a non-versioned object for rtld-*. */ +# define __longjmp_symbol __longjmp +# include "__longjmp-common.S" + +#else /* IS_IN (libc) */ +strong_alias (__vmx__longjmp, __longjmp); +# define __longjmp_symbol __vmx__longjmp +# include "__longjmp-common.S" + +# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +# define __NO_VMX__ +# undef JB_SIZE +# undef __longjmp_symbol +# define __longjmp_symbol __novmx__longjmp +# include "__longjmp-common.S" +# endif +#endif /* IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/a2/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/a2/memcpy.S new file mode 100644 index 0000000000..c795ff48fe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/a2/memcpy.S @@ -0,0 +1,527 @@ +/* Optimized memcpy implementation for PowerPC A2. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Michael Brutman <brutman@us.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define PREFETCH_AHEAD 4 /* no cache lines SRC prefetching ahead */ +#define ZERO_AHEAD 2 /* no cache lines DST zeroing ahead */ + + .machine a2 +EALIGN (memcpy, 5, 0) + CALL_MCOUNT + + dcbt 0,r4 /* Prefetch ONE SRC cacheline */ + cmplwi cr1,r5,16 /* is size < 16 ? */ + mr r6,r3 /* Copy dest reg to r6; */ + blt+ cr1,L(shortcopy) + + + /* Big copy (16 bytes or more) + + Figure out how far to the nearest quadword boundary, or if we are + on one already. + + r3 - return value (always) + r4 - current source addr + r5 - copy length + r6 - current dest addr + */ + + neg r8,r3 /* LS 4 bits = # bytes to 8-byte dest bdry */ + clrlwi r8,r8,32-4 /* align to 16byte boundary */ + sub r7,r4,r3 /* compute offset to src from dest */ + cmplwi cr0,r8,0 /* Were we aligned on a 16 byte bdy? */ + beq+ L(dst_aligned) + + + + /* Destination is not aligned on quadword boundary. Get us to one. + + r3 - return value (always) + r4 - current source addr + r5 - copy length + r6 - current dest addr + r7 - offset to src from dest + r8 - number of bytes to quadword boundary + */ + + mtcrf 0x01,r8 /* put #bytes to boundary into cr7 */ + subf r5,r8,r5 /* adjust remaining len */ + + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte addr */ + stb r0,0(r6) + addi r6,r6,1 +1: + bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte addr */ + sth r0,0(r6) + addi r6,r6,2 +2: + bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte addr */ + stw r0,0(r6) + addi r6,r6,4 +4: + bf cr7*4+0,8f + lfdx r0,r7,r6 /* copy 8 byte addr */ + stfd r0,0(r6) + addi r6,r6,8 +8: + add r4,r7,r6 /* update src addr */ + + + + /* Dest is quadword aligned now. + + Lots of decisions to make. If we are copying less than a cache + line we won't be here long. If we are not on a cache line + boundary we need to get there. And then we need to figure out + how many cache lines ahead to pre-touch. + + r3 - return value (always) + r4 - current source addr + r5 - copy length + r6 - current dest addr + */ + + + .align 4 +L(dst_aligned): + + +#ifdef SHARED + mflr r0 +/* Establishes GOT addressability so we can load __cache_line_size + from static. This value was set from the aux vector during startup. */ + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,__cache_line_size-got_label@ha + lwz r9,__cache_line_size-got_label@l(r9) + mtlr r0 +#else +/* Load __cache_line_size from static. This value was set from the + aux vector during startup. */ + lis r9,__cache_line_size@ha + lwz r9,__cache_line_size@l(r9) +#endif + + cmplwi cr5, r9, 0 + bne+ cr5,L(cachelineset) + +/* __cache_line_size not set: generic byte copy without much optimization */ + andi. r0,r5,1 /* If length is odd copy one byte. */ + beq L(cachelinenotset_align) + lbz r7,0(r4) /* Read one byte from source. */ + addi r5,r5,-1 /* Update length. */ + addi r4,r4,1 /* Update source pointer address. */ + stb r7,0(r6) /* Store one byte on dest. */ + addi r6,r6,1 /* Update dest pointer address. */ +L(cachelinenotset_align): + cmpwi cr7,r5,0 /* If length is 0 return. */ + beqlr cr7 + ori r2,r2,0 /* Force a new dispatch group. */ +L(cachelinenotset_loop): + addic. r5,r5,-2 /* Update length. */ + lbz r7,0(r4) /* Load 2 bytes from source. */ + lbz r8,1(r4) + addi r4,r4,2 /* Update source pointer address. */ + stb r7,0(r6) /* Store 2 bytes on dest. */ + stb r8,1(r6) + addi r6,r6,2 /* Update dest pointer address. */ + bne L(cachelinenotset_loop) + blr + + +L(cachelineset): + + addi r10,r9,-1 + + cmpw cr5,r5,r10 /* Less than a cacheline to go? */ + + neg r7,r6 /* How far to next cacheline bdy? */ + + addi r6,r6,-8 /* prepare for stdu */ + cmpwi cr0,r9,128 + addi r4,r4,-8 /* prepare for ldu */ + + + ble+ cr5,L(lessthancacheline) + + beq- cr0,L(big_lines) /* 128 byte line code */ + + + + + /* More than a cacheline left to go, and using 64 byte cachelines */ + + clrlwi r7,r7,32-6 /* How far to next cacheline bdy? */ + + cmplwi cr6,r7,0 /* Are we on a cacheline bdy already? */ + + /* Reduce total len by what it takes to get to the next cache line */ + subf r5,r7,r5 + srwi r7,r7,4 /* How many qws to get to the line bdy? */ + + /* How many full cache lines to copy after getting to a line bdy? */ + srwi r10,r5,6 + + cmplwi r10,0 /* If no full cache lines to copy ... */ + li r11,0 /* number cachelines to copy with prefetch */ + beq L(nocacheprefetch) + + + /* We are here because we have at least one full cache line to copy, + and therefore some pre-touching to do. */ + + cmplwi r10,PREFETCH_AHEAD + li r12,64+8 /* prefetch distance */ + ble L(lessthanmaxprefetch) + + /* We can only do so much pre-fetching. R11 will have the count of + lines left to prefetch after the initial batch of prefetches + are executed. */ + + subi r11,r10,PREFETCH_AHEAD + li r10,PREFETCH_AHEAD + +L(lessthanmaxprefetch): + mtctr r10 + + /* At this point r10/ctr hold the number of lines to prefetch in this + initial batch, and r11 holds any remainder. */ + +L(prefetchSRC): + dcbt r12,r4 + addi r12,r12,64 + bdnz L(prefetchSRC) + + + /* Prefetching is done, or was not needed. + + cr6 - are we on a cacheline boundary already? + r7 - number of quadwords to the next cacheline boundary + */ + +L(nocacheprefetch): + mtctr r7 + + cmplwi cr1,r5,64 /* Less than a cache line to copy? */ + + /* How many bytes are left after we copy whatever full + cache lines we can get? */ + clrlwi r5,r5,32-6 + + beq cr6,L(cachelinealigned) + + + /* Copy quadwords up to the next cacheline boundary */ + +L(aligntocacheline): + lfd fp9,0x08(r4) + lfdu fp10,0x10(r4) + stfd fp9,0x08(r6) + stfdu fp10,0x10(r6) + bdnz L(aligntocacheline) + + + .align 4 +L(cachelinealigned): /* copy while cache lines */ + + blt- cr1,L(lessthancacheline) /* size <64 */ + +L(outerloop): + cmpwi r11,0 + mtctr r11 + beq- L(endloop) + + li r11,64*ZERO_AHEAD +8 /* DCBZ dist */ + + .align 4 + /* Copy whole cachelines, optimized by prefetching SRC cacheline */ +L(loop): /* Copy aligned body */ + dcbt r12,r4 /* PREFETCH SOURCE some cache lines ahead */ + lfd fp9, 0x08(r4) + dcbz r11,r6 + lfd fp10, 0x10(r4) + lfd fp11, 0x18(r4) + lfd fp12, 0x20(r4) + stfd fp9, 0x08(r6) + stfd fp10, 0x10(r6) + stfd fp11, 0x18(r6) + stfd fp12, 0x20(r6) + lfd fp9, 0x28(r4) + lfd fp10, 0x30(r4) + lfd fp11, 0x38(r4) + lfdu fp12, 0x40(r4) + stfd fp9, 0x28(r6) + stfd fp10, 0x30(r6) + stfd fp11, 0x38(r6) + stfdu fp12, 0x40(r6) + + bdnz L(loop) + + +L(endloop): + cmpwi r10,0 + beq- L(endloop2) + mtctr r10 + +L(loop2): /* Copy aligned body */ + lfd fp9, 0x08(r4) + lfd fp10, 0x10(r4) + lfd fp11, 0x18(r4) + lfd fp12, 0x20(r4) + stfd fp9, 0x08(r6) + stfd fp10, 0x10(r6) + stfd fp11, 0x18(r6) + stfd fp12, 0x20(r6) + lfd fp9, 0x28(r4) + lfd fp10, 0x30(r4) + lfd fp11, 0x38(r4) + lfdu fp12, 0x40(r4) + stfd fp9, 0x28(r6) + stfd fp10, 0x30(r6) + stfd fp11, 0x38(r6) + stfdu fp12, 0x40(r6) + + bdnz L(loop2) +L(endloop2): + + + .align 4 +L(lessthancacheline): /* Was there less than cache to do ? */ + cmplwi cr0,r5,16 + srwi r7,r5,4 /* divide size by 16 */ + blt- L(do_lt16) + mtctr r7 + +L(copy_remaining): + lfd fp9, 0x08(r4) + lfdu fp10, 0x10(r4) + stfd fp9, 0x08(r6) + stfdu fp10, 0x10(r6) + bdnz L(copy_remaining) + +L(do_lt16): /* less than 16 ? */ + cmplwi cr0,r5,0 /* copy remaining bytes (0-15) */ + beqlr+ /* no rest to copy */ + addi r4,r4,8 + addi r6,r6,8 + +L(shortcopy): /* SIMPLE COPY to handle size =< 15 bytes */ + mtcrf 0x01,r5 + sub r7,r4,r6 + bf- cr7*4+0,8f + lfdx fp9,r7,r6 /* copy 8 byte */ + stfd fp9,0(r6) + addi r6,r6,8 +8: + bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte */ + stw r0,0(r6) + addi r6,r6,4 +4: + bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte */ + sth r0,0(r6) + addi r6,r6,2 +2: + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte */ + stb r0,0(r6) +1: + blr + + + + + + /* Similar to above, but for use with 128 byte lines. */ + + +L(big_lines): + + clrlwi r7,r7,32-7 /* How far to next cacheline bdy? */ + + cmplwi cr6,r7,0 /* Are we on a cacheline bdy already? */ + + /* Reduce total len by what it takes to get to the next cache line */ + subf r5,r7,r5 + srwi r7,r7,4 /* How many qw to get to the line bdy? */ + + /* How many full cache lines to copy after getting to a line bdy? */ + srwi r10,r5,7 + + cmplwi r10,0 /* If no full cache lines to copy ... */ + li r11,0 /* number cachelines to copy with prefetch */ + beq L(nocacheprefetch_128) + + + /* We are here because we have at least one full cache line to copy, + and therefore some pre-touching to do. */ + + cmplwi r10,PREFETCH_AHEAD + li r12,128+8 /* prefetch distance */ + ble L(lessthanmaxprefetch_128) + + /* We can only do so much pre-fetching. R11 will have the count of + lines left to prefetch after the initial batch of prefetches + are executed. */ + + subi r11,r10,PREFETCH_AHEAD + li r10,PREFETCH_AHEAD + +L(lessthanmaxprefetch_128): + mtctr r10 + + /* At this point r10/ctr hold the number of lines to prefetch in this + initial batch, and r11 holds any remainder. */ + +L(prefetchSRC_128): + dcbt r12,r4 + addi r12,r12,128 + bdnz L(prefetchSRC_128) + + + /* Prefetching is done, or was not needed. + + cr6 - are we on a cacheline boundary already? + r7 - number of quadwords to the next cacheline boundary + */ + +L(nocacheprefetch_128): + mtctr r7 + + cmplwi cr1,r5,128 /* Less than a cache line to copy? */ + + /* How many bytes are left after we copy whatever full + cache lines we can get? */ + clrlwi r5,r5,32-7 + + beq cr6,L(cachelinealigned_128) + + + /* Copy quadwords up to the next cacheline boundary */ + +L(aligntocacheline_128): + lfd fp9,0x08(r4) + lfdu fp10,0x10(r4) + stfd fp9,0x08(r6) + stfdu fp10,0x10(r6) + bdnz L(aligntocacheline_128) + + +L(cachelinealigned_128): /* copy while cache lines */ + + blt- cr1,L(lessthancacheline) /* size <128 */ + +L(outerloop_128): + cmpwi r11,0 + mtctr r11 + beq- L(endloop_128) + + li r11,128*ZERO_AHEAD +8 /* DCBZ dist */ + + .align 4 + /* Copy whole cachelines, optimized by prefetching SRC cacheline */ +L(loop_128): /* Copy aligned body */ + dcbt r12,r4 /* PREFETCH SOURCE some cache lines ahead */ + lfd fp9, 0x08(r4) + dcbz r11,r6 + lfd fp10, 0x10(r4) + lfd fp11, 0x18(r4) + lfd fp12, 0x20(r4) + stfd fp9, 0x08(r6) + stfd fp10, 0x10(r6) + stfd fp11, 0x18(r6) + stfd fp12, 0x20(r6) + lfd fp9, 0x28(r4) + lfd fp10, 0x30(r4) + lfd fp11, 0x38(r4) + lfd fp12, 0x40(r4) + stfd fp9, 0x28(r6) + stfd fp10, 0x30(r6) + stfd fp11, 0x38(r6) + stfd fp12, 0x40(r6) + lfd fp9, 0x48(r4) + lfd fp10, 0x50(r4) + lfd fp11, 0x58(r4) + lfd fp12, 0x60(r4) + stfd fp9, 0x48(r6) + stfd fp10, 0x50(r6) + stfd fp11, 0x58(r6) + stfd fp12, 0x60(r6) + lfd fp9, 0x68(r4) + lfd fp10, 0x70(r4) + lfd fp11, 0x78(r4) + lfdu fp12, 0x80(r4) + stfd fp9, 0x68(r6) + stfd fp10, 0x70(r6) + stfd fp11, 0x78(r6) + stfdu fp12, 0x80(r6) + + bdnz L(loop_128) + + +L(endloop_128): + cmpwi r10,0 + beq- L(endloop2_128) + mtctr r10 + +L(loop2_128): /* Copy aligned body */ + lfd fp9, 0x08(r4) + lfd fp10, 0x10(r4) + lfd fp11, 0x18(r4) + lfd fp12, 0x20(r4) + stfd fp9, 0x08(r6) + stfd fp10, 0x10(r6) + stfd fp11, 0x18(r6) + stfd fp12, 0x20(r6) + lfd fp9, 0x28(r4) + lfd fp10, 0x30(r4) + lfd fp11, 0x38(r4) + lfd fp12, 0x40(r4) + stfd fp9, 0x28(r6) + stfd fp10, 0x30(r6) + stfd fp11, 0x38(r6) + stfd fp12, 0x40(r6) + lfd fp9, 0x48(r4) + lfd fp10, 0x50(r4) + lfd fp11, 0x58(r4) + lfd fp12, 0x60(r4) + stfd fp9, 0x48(r6) + stfd fp10, 0x50(r6) + stfd fp11, 0x58(r6) + stfd fp12, 0x60(r6) + lfd fp9, 0x68(r4) + lfd fp10, 0x70(r4) + lfd fp11, 0x78(r4) + lfdu fp12, 0x80(r4) + stfd fp9, 0x68(r6) + stfd fp10, 0x70(r6) + stfd fp11, 0x78(r6) + stfdu fp12, 0x80(r6) + bdnz L(loop2_128) +L(endloop2_128): + + b L(lessthancacheline) + + +END (memcpy) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/add_n.S b/REORG.TODO/sysdeps/powerpc/powerpc32/add_n.S new file mode 100644 index 0000000000..0687be6236 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/add_n.S @@ -0,0 +1,68 @@ +/* Add two limb vectors of equal, non-zero length for PowerPC. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* mp_limb_t mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, + mp_size_t size) + Calculate s1+s2 and put result in res_ptr; return carry, 0 or 1. */ + +/* Note on optimisation: This code is optimal for the 601. Almost every other + possible 2-unrolled inner loop will not be. Also, watch out for the + alignment... */ + +EALIGN (__mpn_add_n, 3, 0) + +/* Set up for loop below. */ + mtcrf 0x01,r6 + srwi. r7,r6,1 + li r10,0 + mtctr r7 + bt 31,L(2) + +/* Clear the carry. */ + addic r0,r0,0 +/* Adjust pointers for loop. */ + addi r3,r3,-4 + addi r4,r4,-4 + addi r5,r5,-4 + b L(0) + +L(2): lwz r7,0(r5) + lwz r6,0(r4) + addc r6,r6,r7 + stw r6,0(r3) + beq L(1) + +/* The loop. */ + +/* Align start of loop to an odd word boundary to guarantee that the + last two words can be fetched in one access (for 601). */ +L(0): lwz r9,4(r4) + lwz r8,4(r5) + lwzu r6,8(r4) + lwzu r7,8(r5) + adde r8,r9,r8 + stw r8,4(r3) + adde r6,r6,r7 + stwu r6,8(r3) + bdnz L(0) +/* Return the carry. */ +L(1): addze r3,r10 + blr +END (__mpn_add_n) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/addmul_1.S b/REORG.TODO/sysdeps/powerpc/powerpc32/addmul_1.S new file mode 100644 index 0000000000..f742be21b8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/addmul_1.S @@ -0,0 +1,48 @@ +/* Multiply a limb vector by a single limb, for PowerPC. + Copyright (C) 1993-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* mp_limb_t mpn_addmul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, + mp_size_t s1_size, mp_limb_t s2_limb) + Calculate res+s1*s2 and put result back in res; return carry. */ +ENTRY (__mpn_addmul_1) + mtctr r5 + + lwz r0,0(r4) + mullw r7,r0,r6 + mulhwu r10,r0,r6 + lwz r9,0(r3) + addc r8,r7,r9 + addi r3,r3,-4 /* adjust res_ptr */ + bdz L(1) + +L(0): lwzu r0,4(r4) + stwu r8,4(r3) + mullw r8,r0,r6 + adde r7,r8,r10 + mulhwu r10,r0,r6 + lwz r9,4(r3) + addze r10,r10 + addc r8,r7,r9 + bdnz L(0) + +L(1): stw r8,4(r3) + addze r3,r10 + blr +END (__mpn_addmul_1) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/atomic-machine.h b/REORG.TODO/sysdeps/powerpc/powerpc32/atomic-machine.h new file mode 100644 index 0000000000..96c7d81359 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/atomic-machine.h @@ -0,0 +1,126 @@ +/* Atomic operations. PowerPC32 version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Paul Mackerras <paulus@au.ibm.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* POWER6 adds a "Mutex Hint" to the Load and Reserve instruction. + This is a hint to the hardware to expect additional updates adjacent + to the lock word or not. If we are acquiring a Mutex, the hint + should be true. Otherwise we releasing a Mutex or doing a simple + atomic operation. In that case we don't expect additional updates + adjacent to the lock word after the Store Conditional and the hint + should be false. */ + +#if defined _ARCH_PWR6 || defined _ARCH_PWR6X +# define MUTEX_HINT_ACQ ",1" +# define MUTEX_HINT_REL ",0" +#else +# define MUTEX_HINT_ACQ +# define MUTEX_HINT_REL +#endif + +#define __HAVE_64B_ATOMICS 0 +#define USE_ATOMIC_COMPILER_BUILTINS 0 +#define ATOMIC_EXCHANGE_USES_CAS 1 + +/* + * The 32-bit exchange_bool is different on powerpc64 because the subf + * does signed 64-bit arithmetic while the lwarx is 32-bit unsigned + * (a load word and zero (high 32) form). So powerpc64 has a slightly + * different version in sysdeps/powerpc/powerpc64/atomic-machine.h. + */ +#define __arch_compare_and_exchange_bool_32_acq(mem, newval, oldval) \ +({ \ + unsigned int __tmp; \ + __asm __volatile ( \ + "1: lwarx %0,0,%1" MUTEX_HINT_ACQ "\n" \ + " subf. %0,%2,%0\n" \ + " bne 2f\n" \ + " stwcx. %3,0,%1\n" \ + " bne- 1b\n" \ + "2: " __ARCH_ACQ_INSTR \ + : "=&r" (__tmp) \ + : "b" (mem), "r" (oldval), "r" (newval) \ + : "cr0", "memory"); \ + __tmp != 0; \ +}) + +/* Powerpc32 processors don't implement the 64-bit (doubleword) forms of + load and reserve (ldarx) and store conditional (stdcx.) instructions. + So for powerpc32 we stub out the 64-bit forms. */ +#define __arch_compare_and_exchange_bool_64_acq(mem, newval, oldval) \ + (abort (), 0) + +#define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \ + (abort (), (__typeof (*mem)) 0) + +#define __arch_compare_and_exchange_val_64_rel(mem, newval, oldval) \ + (abort (), (__typeof (*mem)) 0) + +#define __arch_atomic_exchange_64_acq(mem, value) \ + ({ abort (); (*mem) = (value); }) + +#define __arch_atomic_exchange_64_rel(mem, value) \ + ({ abort (); (*mem) = (value); }) + +#define __arch_atomic_exchange_and_add_64(mem, value) \ + ({ abort (); (*mem) = (value); }) + +#define __arch_atomic_exchange_and_add_64_acq(mem, value) \ + ({ abort (); (*mem) = (value); }) + +#define __arch_atomic_exchange_and_add_64_rel(mem, value) \ + ({ abort (); (*mem) = (value); }) + +#define __arch_atomic_increment_val_64(mem) \ + ({ abort (); (*mem)++; }) + +#define __arch_atomic_decrement_val_64(mem) \ + ({ abort (); (*mem)--; }) + +#define __arch_atomic_decrement_if_positive_64(mem) \ + ({ abort (); (*mem)--; }) + +#ifdef _ARCH_PWR4 +/* + * Newer powerpc64 processors support the new "light weight" sync (lwsync) + * So if the build is using -mcpu=[power4,power5,power5+,970] we can + * safely use lwsync. + */ +# define atomic_read_barrier() __asm ("lwsync" ::: "memory") +/* + * "light weight" sync can also be used for the release barrier. + */ +# ifndef UP +# define __ARCH_REL_INSTR "lwsync" +# endif +# define atomic_write_barrier() __asm ("lwsync" ::: "memory") +#else +/* + * Older powerpc32 processors don't support the new "light weight" + * sync (lwsync). So the only safe option is to use normal sync + * for all powerpc32 applications. + */ +# define atomic_read_barrier() __asm ("sync" ::: "memory") +# define atomic_write_barrier() __asm ("sync" ::: "memory") +#endif + +/* + * Include the rest of the atomic ops macros which are common to both + * powerpc32 and powerpc64. + */ +#include_next <atomic-machine.h> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/backtrace.c b/REORG.TODO/sysdeps/powerpc/powerpc32/backtrace.c new file mode 100644 index 0000000000..394062136c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/backtrace.c @@ -0,0 +1,131 @@ +/* Return backtrace of current program state. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <execinfo.h> +#include <stddef.h> +#include <string.h> +#include <signal.h> +#include <libc-vdso.h> + +/* This is the stack layout we see with every stack frame. + Note that every routine is required by the ABI to lay out the stack + like this. + + +----------------+ +-----------------+ + %r1 -> | %r1 last frame--------> | %r1 last frame--->... --> NULL + | | | | + | (unused) | | return address | + +----------------+ +-----------------+ +*/ +struct layout +{ + struct layout *next; + void *return_address; +}; + +#define SIGNAL_FRAMESIZE 64 + +/* Since the signal handler is just like any other function it needs to + save/restore its LR and it will save it into callers stack frame. + Since a signal handler doesn't have a caller, the kernel creates a + dummy frame to make it look like it has a caller. */ +struct signal_frame_32 { + char dummy[SIGNAL_FRAMESIZE]; + struct sigcontext sctx; + mcontext_t mctx; + /* We don't care about the rest, since IP value is at 'mctx' field. */ +}; + +static inline int +is_sigtramp_address (void *nip) +{ +#ifdef SHARED + if (nip == VDSO_SYMBOL (sigtramp32)) + return 1; +#endif + return 0; +} + +struct rt_signal_frame_32 { + char dummy[SIGNAL_FRAMESIZE + 16]; + siginfo_t info; + struct ucontext uc; + /* We don't care about the rest, since IP value is at 'uc' field. */ +}; + +static inline int +is_sigtramp_address_rt (void * nip) +{ +#ifdef SHARED + if (nip == VDSO_SYMBOL (sigtramp_rt32)) + return 1; +#endif + return 0; +} + +int +__backtrace (void **array, int size) +{ + struct layout *current; + int count; + + /* Force gcc to spill LR. */ + asm volatile ("" : "=l"(current)); + + /* Get the address on top-of-stack. */ + asm volatile ("lwz %0,0(1)" : "=r"(current)); + + for ( count = 0; + current != NULL && count < size; + current = current->next, count++) + { + gregset_t *gregset = NULL; + + array[count] = current->return_address; + + /* Check if the symbol is the signal trampoline and get the interrupted + * symbol address from the trampoline saved area. */ + if (is_sigtramp_address (current->return_address)) + { + struct signal_frame_32 *sigframe = + (struct signal_frame_32*) current; + gregset = &sigframe->mctx.gregs; + } + else if (is_sigtramp_address_rt (current->return_address)) + { + struct rt_signal_frame_32 *sigframe = + (struct rt_signal_frame_32*) current; + gregset = &sigframe->uc.uc_mcontext.uc_regs->gregs; + } + if (gregset) + { + array[++count] = (void*)((*gregset)[PT_NIP]); + current = (void*)((*gregset)[PT_R1]); + } + } + + /* It's possible the second-last stack frame can't return + (that is, it's __libc_start_main), in which case + the CRT startup code will have set its LR to 'NULL'. */ + if (count > 0 && array[count-1] == NULL) + count--; + + return count; +} +weak_alias (__backtrace, backtrace) +libc_hidden_def (__backtrace) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/bits/wordsize.h b/REORG.TODO/sysdeps/powerpc/powerpc32/bits/wordsize.h new file mode 100644 index 0000000000..04ca9debf0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/bits/wordsize.h @@ -0,0 +1,11 @@ +/* Determine the wordsize from the preprocessor defines. */ + +#if defined __powerpc64__ +# define __WORDSIZE 64 +# define __WORDSIZE_TIME64_COMPAT32 1 +#else +# define __WORDSIZE 32 +# define __WORDSIZE_TIME64_COMPAT32 0 +# define __WORDSIZE32_SIZE_ULONG 0 +# define __WORDSIZE32_PTRDIFF_LONG 0 +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/bsd-_setjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/bsd-_setjmp.S new file mode 100644 index 0000000000..169766c304 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/bsd-_setjmp.S @@ -0,0 +1,56 @@ +/* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'. PowerPC32/64 version. + Copyright (C) 1994-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#include <shlib-compat.h> +#include <libc-symbols.h> +#include <sysdep.h> + +#if !IS_IN (libc) +/* Build a non-versioned object for rtld-*. */ +ENTRY (_setjmp) + li r4,0 /* Set second argument to 0. */ + b __sigsetjmp@local +END (_setjmp) +libc_hidden_def (_setjmp) +#else +/* Build a versioned object for libc. */ + +# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +compat_symbol (libc, __novmx_setjmp, _setjmp, GLIBC_2_0); + +ENTRY (__novmx_setjmp) + li r4,0 /* Set second argument to 0. */ + b __novmx__sigsetjmp@local +END (__novmx_setjmp) +libc_hidden_def (__novmx_setjmp) +# endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) */ + +versioned_symbol (libc, __vmx_setjmp, _setjmp, GLIBC_2_3_4) +/* __GI__setjmp prototype is needed for ntpl i.e. _setjmp is defined + as a libc_hidden_proto & is used in sysdeps/generic/libc-start.c + if HAVE_CLEANUP_JMP_BUF is defined */ +ENTRY (__GI__setjmp) + li r4,0 /* Set second argument to 0. */ + b __vmx__sigsetjmp@local +END (__GI__setjmp) + +ENTRY (__vmx_setjmp) + li r4,0 /* Set second argument to 0. */ + b __vmx__sigsetjmp@local +END (__vmx_setjmp) +libc_hidden_def (__vmx_setjmp) +#endif /* IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/bsd-setjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/bsd-setjmp.S new file mode 100644 index 0000000000..212d6ce8b1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/bsd-setjmp.S @@ -0,0 +1,39 @@ +/* BSD `setjmp' entry point to `sigsetjmp (..., 1)'. PowerPC32/64 version. + Copyright (C) 1994-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#include <shlib-compat.h> +#include <libc-symbols.h> +#include <sysdep.h> + +#if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) + +ENTRY (__novmxsetjmp) + li r4,1 /* Set second argument to 1. */ + b __novmx__sigsetjmp@local +END (__novmxsetjmp) +strong_alias (__novmxsetjmp, __novmx__setjmp) +compat_symbol (libc, __novmxsetjmp, setjmp, GLIBC_2_0) + +#endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) ) */ + +ENTRY (__vmxsetjmp) + li r4,1 /* Set second argument to 1. */ + b __vmx__sigsetjmp@local +END (__vmxsetjmp) +strong_alias (__vmxsetjmp, __vmx__setjmp) +strong_alias (__vmx__setjmp, __setjmp) +versioned_symbol (libc, __vmxsetjmp, setjmp, GLIBC_2_3_4) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/bzero.S b/REORG.TODO/sysdeps/powerpc/powerpc32/bzero.S new file mode 100644 index 0000000000..2638b12db0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/bzero.S @@ -0,0 +1,27 @@ +/* Optimized bzero `implementation' for PowerPC. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__bzero) + + mr r5,r4 + li r4,0 + b memset@local +END (__bzero) +weak_alias (__bzero, bzero) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/cell/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/cell/memcpy.S new file mode 100644 index 0000000000..a7f761408a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/cell/memcpy.S @@ -0,0 +1,242 @@ +/* Optimized memcpy implementation for CELL BE PowerPC. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define PREFETCH_AHEAD 6 /* no cache lines SRC prefetching ahead */ +#define ZERO_AHEAD 4 /* no cache lines DST zeroing ahead */ + +/* memcpy routine optimized for CELL-BE-PPC v2.0 + * + * The CELL PPC core has 1 integer unit and 1 load/store unit + * CELL: + * 1st level data cache = 32K + * 2nd level data cache = 512K + * 3rd level data cache = 0K + * With 3.2 GHz clockrate the latency to 2nd level cache is >36 clocks, + * latency to memory is >400 clocks + * To improve copy performance we need to prefetch source data + * far ahead to hide this latency + * For best performance instruction forms ending in "." like "andi." + * should be avoided as the are implemented in microcode on CELL. + * The below code is loop unrolled for the CELL cache line of 128 bytes + */ + +.align 7 + +EALIGN (memcpy, 5, 0) + CALL_MCOUNT + + dcbt 0,r4 /* Prefetch ONE SRC cacheline */ + cmplwi cr1,r5,16 /* is size < 16 ? */ + mr r6,r3 + blt+ cr1,.Lshortcopy + +.Lbigcopy: + neg r8,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ + clrlwi r8,r8,32-4 /* align to 16byte boundary */ + sub r7,r4,r3 + cmplwi cr0,r8,0 + beq+ .Ldst_aligned + +.Ldst_unaligned: + mtcrf 0x01,r8 /* put #bytes to boundary into cr7 */ + subf r5,r8,r5 + + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte */ + stb r0,0(r6) + addi r6,r6,1 +1: bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte */ + sth r0,0(r6) + addi r6,r6,2 +2: bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte */ + stw r0,0(r6) + addi r6,r6,4 +4: bf cr7*4+0,8f + lfdx fp9,r7,r6 /* copy 8 byte */ + stfd fp9,0(r6) + addi r6,r6,8 +8: + add r4,r7,r6 + +.Ldst_aligned: + + cmpwi cr5,r5,128-1 + + neg r7,r6 + addi r6,r6,-8 /* prepare for stfdu */ + addi r4,r4,-8 /* prepare for lfdu */ + + clrlwi r7,r7,32-7 /* align to cacheline boundary */ + ble+ cr5,.Llessthancacheline + + cmplwi cr6,r7,0 + subf r5,r7,r5 + srwi r7,r7,4 /* divide size by 16 */ + srwi r10,r5,7 /* number of cache lines to copy */ + + cmplwi r10,0 + li r11,0 /* number cachelines to copy with prefetch */ + beq .Lnocacheprefetch + + cmplwi r10,PREFETCH_AHEAD + li r12,128+8 /* prefetch distance */ + ble .Llessthanmaxprefetch + + subi r11,r10,PREFETCH_AHEAD + li r10,PREFETCH_AHEAD + +.Llessthanmaxprefetch: + mtctr r10 + +.LprefetchSRC: + dcbt r12,r4 + addi r12,r12,128 + bdnz .LprefetchSRC + +.Lnocacheprefetch: + mtctr r7 + cmplwi cr1,r5,128 + clrlwi r5,r5,32-7 + beq cr6,.Lcachelinealigned + +.Laligntocacheline: + lfd fp9,0x08(r4) + lfdu fp10,0x10(r4) + stfd fp9,0x08(r6) + stfdu fp10,0x10(r6) + bdnz .Laligntocacheline + + +.Lcachelinealigned: /* copy while cache lines */ + + blt- cr1,.Llessthancacheline /* size <128 */ + +.Louterloop: + cmpwi r11,0 + mtctr r11 + beq- .Lendloop + + li r11,128*ZERO_AHEAD +8 /* DCBZ dist */ + +.align 4 + /* Copy whole cachelines, optimized by prefetching SRC cacheline */ +.Lloop: /* Copy aligned body */ + dcbt r12,r4 /* PREFETCH SOURCE some cache lines ahead */ + lfd fp9, 0x08(r4) + dcbz r11,r6 + lfd fp10, 0x10(r4) /* 4 register stride copy is optimal */ + lfd fp11, 0x18(r4) /* to hide 1st level cache latency. */ + lfd fp12, 0x20(r4) + stfd fp9, 0x08(r6) + stfd fp10, 0x10(r6) + stfd fp11, 0x18(r6) + stfd fp12, 0x20(r6) + lfd fp9, 0x28(r4) + lfd fp10, 0x30(r4) + lfd fp11, 0x38(r4) + lfd fp12, 0x40(r4) + stfd fp9, 0x28(r6) + stfd fp10, 0x30(r6) + stfd fp11, 0x38(r6) + stfd fp12, 0x40(r6) + lfd fp9, 0x48(r4) + lfd fp10, 0x50(r4) + lfd fp11, 0x58(r4) + lfd fp12, 0x60(r4) + stfd fp9, 0x48(r6) + stfd fp10, 0x50(r6) + stfd fp11, 0x58(r6) + stfd fp12, 0x60(r6) + lfd fp9, 0x68(r4) + lfd fp10, 0x70(r4) + lfd fp11, 0x78(r4) + lfdu fp12, 0x80(r4) + stfd fp9, 0x68(r6) + stfd fp10, 0x70(r6) + stfd fp11, 0x78(r6) + stfdu fp12, 0x80(r6) + + bdnz .Lloop + +.Lendloop: + cmpwi r10,0 + slwi r10,r10,2 /* adjust from 128 to 32 byte stride */ + beq- .Lendloop2 + mtctr r10 + +.Lloop2: /* Copy aligned body */ + lfd fp9, 0x08(r4) + lfd fp10, 0x10(r4) + lfd fp11, 0x18(r4) + lfdu fp12, 0x20(r4) + stfd fp9, 0x08(r6) + stfd fp10, 0x10(r6) + stfd fp11, 0x18(r6) + stfdu fp12, 0x20(r6) + + bdnz .Lloop2 +.Lendloop2: + +.Llessthancacheline: /* less than cache to do ? */ + cmplwi cr0,r5,16 + srwi r7,r5,4 /* divide size by 16 */ + blt- .Ldo_lt16 + mtctr r7 + +.Lcopy_remaining: + lfd fp9,0x08(r4) + lfdu fp10,0x10(r4) + stfd fp9,0x08(r6) + stfdu fp10,0x10(r6) + bdnz .Lcopy_remaining + +.Ldo_lt16: /* less than 16 ? */ + cmplwi cr0,r5,0 /* copy remaining bytes (0-15) */ + beqlr+ /* no rest to copy */ + addi r4,r4,8 + addi r6,r6,8 + +.Lshortcopy: /* SIMPLE COPY to handle size =< 15 bytes */ + mtcrf 0x01,r5 + sub r7,r4,r6 + bf- cr7*4+0,8f + lfdx fp9,r7,r6 /* copy 8 byte */ + stfd fp9,0(r6) + addi r6,r6,8 +8: + bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte */ + stw r0,0(r6) + addi r6,r6,4 +4: + bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte */ + sth r0,0(r6) + addi r6,r6,2 +2: + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte */ + stb r0,0(r6) +1: blr + +END (memcpy) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/compat-ppc-mcount.S b/REORG.TODO/sysdeps/powerpc/powerpc32/compat-ppc-mcount.S new file mode 100644 index 0000000000..2a9cb24072 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/compat-ppc-mcount.S @@ -0,0 +1,11 @@ +#include <shlib-compat.h> + +#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_15) + + compat_text_section +# define _mcount __compat_mcount +# include "ppc-mcount.S" +# undef _mcount + +compat_symbol (libc, __compat_mcount, _mcount, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/configure b/REORG.TODO/sysdeps/powerpc/powerpc32/configure new file mode 100644 index 0000000000..29cfd53e8b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/configure @@ -0,0 +1,29 @@ +# This file is generated from configure.ac by Autoconf. DO NOT EDIT! + # Local configure fragment for sysdeps/powerpc/powerpc32. + +# See whether GCC uses -msecure-plt. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for -msecure-plt by default" >&5 +$as_echo_n "checking for -msecure-plt by default... " >&6; } +if ${libc_cv_ppc_secure_plt+:} false; then : + $as_echo_n "(cached) " >&6 +else + echo 'int foo (void) { extern int bar; return bar; }' > conftest.c +libc_cv_ppc_secure_plt=no +if { ac_try='${CC-cc} -S $CFLAGS conftest.c -fpic -o conftest.s 1>&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + if grep '_GLOBAL_OFFSET_TABLE_-.*@ha' conftest.s > /dev/null 2>&1; then + libc_cv_ppc_secure_plt=yes + fi +fi +rm -rf conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ppc_secure_plt" >&5 +$as_echo "$libc_cv_ppc_secure_plt" >&6; } +if test $libc_cv_ppc_secure_plt = yes; then + $as_echo "#define HAVE_PPC_SECURE_PLT 1" >>confdefs.h + +fi diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/configure.ac b/REORG.TODO/sysdeps/powerpc/powerpc32/configure.ac new file mode 100644 index 0000000000..5d3a9b509d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/configure.ac @@ -0,0 +1,16 @@ +GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. +# Local configure fragment for sysdeps/powerpc/powerpc32. + +# See whether GCC uses -msecure-plt. +AC_CACHE_CHECK(for -msecure-plt by default, libc_cv_ppc_secure_plt, [dnl +echo 'int foo (void) { extern int bar; return bar; }' > conftest.c +libc_cv_ppc_secure_plt=no +if AC_TRY_COMMAND(${CC-cc} -S $CFLAGS conftest.c -fpic -o conftest.s 1>&AS_MESSAGE_LOG_FD); then + if grep '_GLOBAL_OFFSET_TABLE_-.*@ha' conftest.s > /dev/null 2>&1; then + libc_cv_ppc_secure_plt=yes + fi +fi +rm -rf conftest*]) +if test $libc_cv_ppc_secure_plt = yes; then + AC_DEFINE(HAVE_PPC_SECURE_PLT) +fi diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/crti.S b/REORG.TODO/sysdeps/powerpc/powerpc32/crti.S new file mode 100644 index 0000000000..50b02630c3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/crti.S @@ -0,0 +1,89 @@ +/* Special .init and .fini section support for PowerPC. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* crti.S puts a function prologue at the beginning of the .init and + .fini sections and defines global symbols for those addresses, so + they can be called as functions. The symbols _init and _fini are + magic and cause the linker to emit DT_INIT and DT_FINI. */ + +#include <libc-symbols.h> +#include <sysdep.h> + +#ifndef PREINIT_FUNCTION +# define PREINIT_FUNCTION __gmon_start__ +#endif + +#ifndef PREINIT_FUNCTION_WEAK +# define PREINIT_FUNCTION_WEAK 1 +#endif + +#if PREINIT_FUNCTION_WEAK + weak_extern (PREINIT_FUNCTION) +#else + .hidden PREINIT_FUNCTION +#endif + + .section .init,"ax",@progbits + .align 2 + .globl _init + .type _init, @function +_init: + stwu r1, -16(r1) + mflr r0 + stw r0, 20(r1) + stw r30, 8(r1) + SETUP_GOT_ACCESS (r30, .Lgot_label_i) + addis r30, r30, _GLOBAL_OFFSET_TABLE_-.Lgot_label_i@ha + addi r30, r30, _GLOBAL_OFFSET_TABLE_-.Lgot_label_i@l +#if PREINIT_FUNCTION_WEAK + lwz r0, PREINIT_FUNCTION@got(r30) + cmpwi cr7, r0, 0 + beq+ cr7, 1f + bl PREINIT_FUNCTION@plt +1: +#else + bl PREINIT_FUNCTION@local +#endif + + .section .fini,"ax",@progbits + .align 2 + .globl _fini + .type _fini, @function +_fini: + stwu r1, -16(r1) + mflr r0 + stw r0, 20(r1) + stw r30, 8(r1) + SETUP_GOT_ACCESS (r30, .Lgot_label_f) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/crtn.S b/REORG.TODO/sysdeps/powerpc/powerpc32/crtn.S new file mode 100644 index 0000000000..67be2950fe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/crtn.S @@ -0,0 +1,53 @@ +/* Special .init and .fini section support for PowerPC. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* crtn.S puts function epilogues in the .init and .fini sections + corresponding to the prologues in crti.S. */ + +#include <sysdep.h> + + .section .init,"ax",@progbits + lwz r0, 20(r1) + mtlr r0 + lwz r30, 8(r1) + addi r1, r1, 16 + blr + + .section .fini,"ax",@progbits + lwz r0, 20(r1) + mtlr r0 + lwz r30, 8(r1) + addi r1, r1, 16 + blr diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/dl-dtprocnum.h b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-dtprocnum.h new file mode 100644 index 0000000000..7fe2be7939 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-dtprocnum.h @@ -0,0 +1,3 @@ +/* Number of extra dynamic section entries for this architecture. By + default there are none. */ +#define DT_THISPROCNUM DT_PPC_NUM diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/dl-irel.h b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-irel.h new file mode 100644 index 0000000000..e5d6540ce4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-irel.h @@ -0,0 +1,52 @@ +/* Machine-dependent ELF indirect relocation inline functions. + PowerPC version. + Copyright (C) 2009-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _DL_IREL_H +#define _DL_IREL_H + +#include <stdio.h> +#include <unistd.h> +#include <ldsodefs.h> + +#define ELF_MACHINE_IRELA 1 + +static inline Elf32_Addr +__attribute ((always_inline)) +elf_ifunc_invoke (Elf32_Addr addr) +{ + return ((Elf32_Addr (*) (unsigned long int)) (addr)) (GLRO(dl_hwcap)); +} + +static inline void +__attribute ((always_inline)) +elf_irela (const Elf32_Rela *reloc) +{ + unsigned int r_type = ELF32_R_TYPE (reloc->r_info); + + if (__glibc_likely (r_type == R_PPC_IRELATIVE)) + { + Elf32_Addr *const reloc_addr = (void *) reloc->r_offset; + Elf32_Addr value = elf_ifunc_invoke(reloc->r_addend); + *reloc_addr = value; + } + else + __libc_fatal ("unexpected reloc type in static binary"); +} + +#endif /* dl-irel.h */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/dl-machine.c b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-machine.c new file mode 100644 index 0000000000..2d6a576552 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-machine.c @@ -0,0 +1,608 @@ +/* Machine-dependent ELF dynamic relocation functions. PowerPC version. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <unistd.h> +#include <string.h> +#include <sys/param.h> +#include <link.h> +#include <ldsodefs.h> +#include <elf/dynamic-link.h> +#include <dl-machine.h> +#include <_itoa.h> + +/* The value __cache_line_size is defined in dl-sysdep.c and is initialised + by _dl_sysdep_start via DL_PLATFORM_INIT. */ +extern int __cache_line_size attribute_hidden; + + +/* Stuff for the PLT. */ +#define PLT_INITIAL_ENTRY_WORDS 18 +#define PLT_LONGBRANCH_ENTRY_WORDS 0 +#define PLT_TRAMPOLINE_ENTRY_WORDS 6 +#define PLT_DOUBLE_SIZE (1<<13) +#define PLT_ENTRY_START_WORDS(entry_number) \ + (PLT_INITIAL_ENTRY_WORDS + (entry_number)*2 \ + + ((entry_number) > PLT_DOUBLE_SIZE \ + ? ((entry_number) - PLT_DOUBLE_SIZE)*2 \ + : 0)) +#define PLT_DATA_START_WORDS(num_entries) PLT_ENTRY_START_WORDS(num_entries) + +/* Macros to build PowerPC opcode words. */ +#define OPCODE_ADDI(rd,ra,simm) \ + (0x38000000 | (rd) << 21 | (ra) << 16 | ((simm) & 0xffff)) +#define OPCODE_ADDIS(rd,ra,simm) \ + (0x3c000000 | (rd) << 21 | (ra) << 16 | ((simm) & 0xffff)) +#define OPCODE_ADD(rd,ra,rb) \ + (0x7c000214 | (rd) << 21 | (ra) << 16 | (rb) << 11) +#define OPCODE_B(target) (0x48000000 | ((target) & 0x03fffffc)) +#define OPCODE_BA(target) (0x48000002 | ((target) & 0x03fffffc)) +#define OPCODE_BCTR() 0x4e800420 +#define OPCODE_LWZ(rd,d,ra) \ + (0x80000000 | (rd) << 21 | (ra) << 16 | ((d) & 0xffff)) +#define OPCODE_LWZU(rd,d,ra) \ + (0x84000000 | (rd) << 21 | (ra) << 16 | ((d) & 0xffff)) +#define OPCODE_MTCTR(rd) (0x7C0903A6 | (rd) << 21) +#define OPCODE_RLWINM(ra,rs,sh,mb,me) \ + (0x54000000 | (rs) << 21 | (ra) << 16 | (sh) << 11 | (mb) << 6 | (me) << 1) + +#define OPCODE_LI(rd,simm) OPCODE_ADDI(rd,0,simm) +#define OPCODE_ADDIS_HI(rd,ra,value) \ + OPCODE_ADDIS(rd,ra,((value) + 0x8000) >> 16) +#define OPCODE_LIS_HI(rd,value) OPCODE_ADDIS_HI(rd,0,value) +#define OPCODE_SLWI(ra,rs,sh) OPCODE_RLWINM(ra,rs,sh,0,31-sh) + + +#define PPC_DCBST(where) asm volatile ("dcbst 0,%0" : : "r"(where) : "memory") +#define PPC_SYNC asm volatile ("sync" : : : "memory") +#define PPC_ISYNC asm volatile ("sync; isync" : : : "memory") +#define PPC_ICBI(where) asm volatile ("icbi 0,%0" : : "r"(where) : "memory") +#define PPC_DIE asm volatile ("tweq 0,0") + +/* Use this when you've modified some code, but it won't be in the + instruction fetch queue (or when it doesn't matter if it is). */ +#define MODIFIED_CODE_NOQUEUE(where) \ + do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); } while (0) +/* Use this when it might be in the instruction queue. */ +#define MODIFIED_CODE(where) \ + do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); PPC_ISYNC; } while (0) + + +/* The idea here is that to conform to the ABI, we are supposed to try + to load dynamic objects between 0x10000 (we actually use 0x40000 as + the lower bound, to increase the chance of a memory reference from + a null pointer giving a segfault) and the program's load address; + this may allow us to use a branch instruction in the PLT rather + than a computed jump. The address is only used as a preference for + mmap, so if we get it wrong the worst that happens is that it gets + mapped somewhere else. */ + +ElfW(Addr) +__elf_preferred_address (struct link_map *loader, size_t maplength, + ElfW(Addr) mapstartpref) +{ + ElfW(Addr) low, high; + struct link_map *l; + Lmid_t nsid; + + /* If the object has a preference, load it there! */ + if (mapstartpref != 0) + return mapstartpref; + + /* Otherwise, quickly look for a suitable gap between 0x3FFFF and + 0x70000000. 0x3FFFF is so that references off NULL pointers will + cause a segfault, 0x70000000 is just paranoia (it should always + be superseded by the program's load address). */ + low = 0x0003FFFF; + high = 0x70000000; + for (nsid = 0; nsid < DL_NNS; ++nsid) + for (l = GL(dl_ns)[nsid]._ns_loaded; l; l = l->l_next) + { + ElfW(Addr) mapstart, mapend; + mapstart = l->l_map_start & ~(GLRO(dl_pagesize) - 1); + mapend = l->l_map_end | (GLRO(dl_pagesize) - 1); + assert (mapend > mapstart); + + /* Prefer gaps below the main executable, note that l == + _dl_loaded does not work for static binaries loading + e.g. libnss_*.so. */ + if ((mapend >= high || l->l_type == lt_executable) + && high >= mapstart) + high = mapstart; + else if (mapend >= low && low >= mapstart) + low = mapend; + else if (high >= mapend && mapstart >= low) + { + if (high - mapend >= mapstart - low) + low = mapend; + else + high = mapstart; + } + } + + high -= 0x10000; /* Allow some room between objects. */ + maplength = (maplength | (GLRO(dl_pagesize) - 1)) + 1; + if (high <= low || high - low < maplength ) + return 0; + return high - maplength; /* Both high and maplength are page-aligned. */ +} + +/* Set up the loaded object described by L so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. + Also install a small trampoline to be used by entries that have + been relocated to an address too far away for a single branch. */ + +/* There are many kinds of PLT entries: + + (1) A direct jump to the actual routine, either a relative or + absolute branch. These are set up in __elf_machine_fixup_plt. + + (2) Short lazy entries. These cover the first 8192 slots in + the PLT, and look like (where 'index' goes from 0 to 8191): + + li %r11, index*4 + b &plt[PLT_TRAMPOLINE_ENTRY_WORDS+1] + + (3) Short indirect jumps. These replace (2) when a direct jump + wouldn't reach. They look the same except that the branch + is 'b &plt[PLT_LONGBRANCH_ENTRY_WORDS]'. + + (4) Long lazy entries. These cover the slots when a short entry + won't fit ('index*4' overflows its field), and look like: + + lis %r11, %hi(index*4 + &plt[PLT_DATA_START_WORDS]) + lwzu %r12, %r11, %lo(index*4 + &plt[PLT_DATA_START_WORDS]) + b &plt[PLT_TRAMPOLINE_ENTRY_WORDS] + bctr + + (5) Long indirect jumps. These replace (4) when a direct jump + wouldn't reach. They look like: + + lis %r11, %hi(index*4 + &plt[PLT_DATA_START_WORDS]) + lwz %r12, %r11, %lo(index*4 + &plt[PLT_DATA_START_WORDS]) + mtctr %r12 + bctr + + (6) Long direct jumps. These are used when thread-safety is not + required. They look like: + + lis %r12, %hi(finaladdr) + addi %r12, %r12, %lo(finaladdr) + mtctr %r12 + bctr + + + The lazy entries, (2) and (4), are set up here in + __elf_machine_runtime_setup. (1), (3), and (5) are set up in + __elf_machine_fixup_plt. (1), (3), and (6) can also be constructed + in __process_machine_rela. + + The reason for the somewhat strange construction of the long + entries, (4) and (5), is that we need to ensure thread-safety. For + (1) and (3), this is obvious because only one instruction is + changed and the PPC architecture guarantees that aligned stores are + atomic. For (5), this is more tricky. When changing (4) to (5), + the `b' instruction is first changed to `mtctr'; this is safe + and is why the `lwzu' instruction is not just a simple `addi'. + Once this is done, and is visible to all processors, the `lwzu' can + safely be changed to a `lwz'. */ +int +__elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) +{ + if (map->l_info[DT_JMPREL]) + { + Elf32_Word i; + Elf32_Word *plt = (Elf32_Word *) D_PTR (map, l_info[DT_PLTGOT]); + Elf32_Word num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val + / sizeof (Elf32_Rela)); + Elf32_Word rel_offset_words = PLT_DATA_START_WORDS (num_plt_entries); + Elf32_Word data_words = (Elf32_Word) (plt + rel_offset_words); + Elf32_Word size_modified; + + extern void _dl_runtime_resolve (void); + extern void _dl_prof_resolve (void); + + /* Convert the index in r11 into an actual address, and get the + word at that address. */ + plt[PLT_LONGBRANCH_ENTRY_WORDS] = OPCODE_ADDIS_HI (11, 11, data_words); + plt[PLT_LONGBRANCH_ENTRY_WORDS + 1] = OPCODE_LWZ (11, data_words, 11); + + /* Call the procedure at that address. */ + plt[PLT_LONGBRANCH_ENTRY_WORDS + 2] = OPCODE_MTCTR (11); + plt[PLT_LONGBRANCH_ENTRY_WORDS + 3] = OPCODE_BCTR (); + + if (lazy) + { + Elf32_Word *tramp = plt + PLT_TRAMPOLINE_ENTRY_WORDS; + Elf32_Word dlrr; + Elf32_Word offset; + +#ifndef PROF + dlrr = (Elf32_Word) (profile + ? _dl_prof_resolve + : _dl_runtime_resolve); + if (profile && GLRO(dl_profile) != NULL + && _dl_name_match_p (GLRO(dl_profile), map)) + /* This is the object we are looking for. Say that we really + want profiling and the timers are started. */ + GL(dl_profile_map) = map; +#else + dlrr = (Elf32_Word) _dl_runtime_resolve; +#endif + + /* For the long entries, subtract off data_words. */ + tramp[0] = OPCODE_ADDIS_HI (11, 11, -data_words); + tramp[1] = OPCODE_ADDI (11, 11, -data_words); + + /* Multiply index of entry by 3 (in r11). */ + tramp[2] = OPCODE_SLWI (12, 11, 1); + tramp[3] = OPCODE_ADD (11, 12, 11); + if (dlrr <= 0x01fffffc || dlrr >= 0xfe000000) + { + /* Load address of link map in r12. */ + tramp[4] = OPCODE_LI (12, (Elf32_Word) map); + tramp[5] = OPCODE_ADDIS_HI (12, 12, (Elf32_Word) map); + + /* Call _dl_runtime_resolve. */ + tramp[6] = OPCODE_BA (dlrr); + } + else + { + /* Get address of _dl_runtime_resolve in CTR. */ + tramp[4] = OPCODE_LI (12, dlrr); + tramp[5] = OPCODE_ADDIS_HI (12, 12, dlrr); + tramp[6] = OPCODE_MTCTR (12); + + /* Load address of link map in r12. */ + tramp[7] = OPCODE_LI (12, (Elf32_Word) map); + tramp[8] = OPCODE_ADDIS_HI (12, 12, (Elf32_Word) map); + + /* Call _dl_runtime_resolve. */ + tramp[9] = OPCODE_BCTR (); + } + + /* Set up the lazy PLT entries. */ + offset = PLT_INITIAL_ENTRY_WORDS; + i = 0; + while (i < num_plt_entries && i < PLT_DOUBLE_SIZE) + { + plt[offset ] = OPCODE_LI (11, i * 4); + plt[offset+1] = OPCODE_B ((PLT_TRAMPOLINE_ENTRY_WORDS + 2 + - (offset+1)) + * 4); + i++; + offset += 2; + } + while (i < num_plt_entries) + { + plt[offset ] = OPCODE_LIS_HI (11, i * 4 + data_words); + plt[offset+1] = OPCODE_LWZU (12, i * 4 + data_words, 11); + plt[offset+2] = OPCODE_B ((PLT_TRAMPOLINE_ENTRY_WORDS + - (offset+2)) + * 4); + plt[offset+3] = OPCODE_BCTR (); + i++; + offset += 4; + } + } + + /* Now, we've modified code. We need to write the changes from + the data cache to a second-level unified cache, then make + sure that stale data in the instruction cache is removed. + (In a multiprocessor system, the effect is more complex.) + Most of the PLT shouldn't be in the instruction cache, but + there may be a little overlap at the start and the end. + + Assumes that dcbst and icbi apply to lines of 16 bytes or + more. Current known line sizes are 16, 32, and 128 bytes. + The following gets the __cache_line_size, when available. */ + + /* Default minimum 4 words per cache line. */ + int line_size_words = 4; + + if (lazy && __cache_line_size != 0) + /* Convert bytes to words. */ + line_size_words = __cache_line_size / 4; + + size_modified = lazy ? rel_offset_words : 6; + for (i = 0; i < size_modified; i += line_size_words) + PPC_DCBST (plt + i); + PPC_DCBST (plt + size_modified - 1); + PPC_SYNC; + + for (i = 0; i < size_modified; i += line_size_words) + PPC_ICBI (plt + i); + PPC_ICBI (plt + size_modified - 1); + PPC_ISYNC; + } + + return lazy; +} + +Elf32_Addr +__elf_machine_fixup_plt (struct link_map *map, + Elf32_Addr *reloc_addr, Elf32_Addr finaladdr) +{ + Elf32_Sword delta = finaladdr - (Elf32_Word) reloc_addr; + if (delta << 6 >> 6 == delta) + *reloc_addr = OPCODE_B (delta); + else if (finaladdr <= 0x01fffffc || finaladdr >= 0xfe000000) + *reloc_addr = OPCODE_BA (finaladdr); + else + { + Elf32_Word *plt, *data_words; + Elf32_Word index, offset, num_plt_entries; + + num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val + / sizeof(Elf32_Rela)); + plt = (Elf32_Word *) D_PTR (map, l_info[DT_PLTGOT]); + offset = reloc_addr - plt; + index = (offset - PLT_INITIAL_ENTRY_WORDS)/2; + data_words = plt + PLT_DATA_START_WORDS (num_plt_entries); + + reloc_addr += 1; + + if (index < PLT_DOUBLE_SIZE) + { + data_words[index] = finaladdr; + PPC_SYNC; + *reloc_addr = OPCODE_B ((PLT_LONGBRANCH_ENTRY_WORDS - (offset+1)) + * 4); + } + else + { + index -= (index - PLT_DOUBLE_SIZE)/2; + + data_words[index] = finaladdr; + PPC_SYNC; + + reloc_addr[1] = OPCODE_MTCTR (12); + MODIFIED_CODE_NOQUEUE (reloc_addr + 1); + PPC_SYNC; + + reloc_addr[0] = OPCODE_LWZ (12, + (Elf32_Word) (data_words + index), 11); + } + } + MODIFIED_CODE (reloc_addr); + return finaladdr; +} + +void +_dl_reloc_overflow (struct link_map *map, + const char *name, + Elf32_Addr *const reloc_addr, + const Elf32_Sym *refsym) +{ + char buffer[128]; + char *t; + t = stpcpy (buffer, name); + t = stpcpy (t, " relocation at 0x00000000"); + _itoa_word ((unsigned) reloc_addr, t, 16, 0); + if (refsym) + { + const char *strtab; + + strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]); + t = stpcpy (t, " for symbol `"); + t = stpcpy (t, strtab + refsym->st_name); + t = stpcpy (t, "'"); + } + t = stpcpy (t, " out of range"); + _dl_signal_error (0, map->l_name, NULL, buffer); +} + +void +__process_machine_rela (struct link_map *map, + const Elf32_Rela *reloc, + struct link_map *sym_map, + const Elf32_Sym *sym, + const Elf32_Sym *refsym, + Elf32_Addr *const reloc_addr, + Elf32_Addr const finaladdr, + int rinfo) +{ + union unaligned + { + uint16_t u2; + uint32_t u4; + } __attribute__((__packed__)); + + switch (rinfo) + { + case R_PPC_NONE: + return; + + case R_PPC_ADDR32: + case R_PPC_GLOB_DAT: + case R_PPC_RELATIVE: + *reloc_addr = finaladdr; + return; + + case R_PPC_IRELATIVE: + *reloc_addr = ((Elf32_Addr (*) (void)) finaladdr) (); + return; + + case R_PPC_UADDR32: + ((union unaligned *) reloc_addr)->u4 = finaladdr; + break; + + case R_PPC_ADDR24: + if (__glibc_unlikely (finaladdr > 0x01fffffc && finaladdr < 0xfe000000)) + _dl_reloc_overflow (map, "R_PPC_ADDR24", reloc_addr, refsym); + *reloc_addr = (*reloc_addr & 0xfc000003) | (finaladdr & 0x3fffffc); + break; + + case R_PPC_ADDR16: + if (__glibc_unlikely (finaladdr > 0x7fff && finaladdr < 0xffff8000)) + _dl_reloc_overflow (map, "R_PPC_ADDR16", reloc_addr, refsym); + *(Elf32_Half*) reloc_addr = finaladdr; + break; + + case R_PPC_UADDR16: + if (__glibc_unlikely (finaladdr > 0x7fff && finaladdr < 0xffff8000)) + _dl_reloc_overflow (map, "R_PPC_UADDR16", reloc_addr, refsym); + ((union unaligned *) reloc_addr)->u2 = finaladdr; + break; + + case R_PPC_ADDR16_LO: + *(Elf32_Half*) reloc_addr = finaladdr; + break; + + case R_PPC_ADDR16_HI: + *(Elf32_Half*) reloc_addr = finaladdr >> 16; + break; + + case R_PPC_ADDR16_HA: + *(Elf32_Half*) reloc_addr = (finaladdr + 0x8000) >> 16; + break; + + case R_PPC_ADDR14: + case R_PPC_ADDR14_BRTAKEN: + case R_PPC_ADDR14_BRNTAKEN: + if (__glibc_unlikely (finaladdr > 0x7fff && finaladdr < 0xffff8000)) + _dl_reloc_overflow (map, "R_PPC_ADDR14", reloc_addr, refsym); + *reloc_addr = (*reloc_addr & 0xffff0003) | (finaladdr & 0xfffc); + if (rinfo != R_PPC_ADDR14) + *reloc_addr = ((*reloc_addr & 0xffdfffff) + | ((rinfo == R_PPC_ADDR14_BRTAKEN) + ^ (finaladdr >> 31)) << 21); + break; + + case R_PPC_REL24: + { + Elf32_Sword delta = finaladdr - (Elf32_Word) reloc_addr; + if (delta << 6 >> 6 != delta) + _dl_reloc_overflow (map, "R_PPC_REL24", reloc_addr, refsym); + *reloc_addr = (*reloc_addr & 0xfc000003) | (delta & 0x3fffffc); + } + break; + + case R_PPC_COPY: + if (sym == NULL) + /* This can happen in trace mode when an object could not be + found. */ + return; + if (sym->st_size > refsym->st_size + || (GLRO(dl_verbose) && sym->st_size < refsym->st_size)) + { + const char *strtab; + + strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]); + _dl_error_printf ("\ +%s: Symbol `%s' has different size in shared object, consider re-linking\n", + RTLD_PROGNAME, strtab + refsym->st_name); + } + memcpy (reloc_addr, (char *) finaladdr, MIN (sym->st_size, + refsym->st_size)); + return; + + case R_PPC_REL32: + *reloc_addr = finaladdr - (Elf32_Word) reloc_addr; + return; + + case R_PPC_JMP_SLOT: + /* It used to be that elf_machine_fixup_plt was used here, + but that doesn't work when ld.so relocates itself + for the second time. On the bright side, there's + no need to worry about thread-safety here. */ + { + Elf32_Sword delta = finaladdr - (Elf32_Word) reloc_addr; + if (delta << 6 >> 6 == delta) + *reloc_addr = OPCODE_B (delta); + else if (finaladdr <= 0x01fffffc || finaladdr >= 0xfe000000) + *reloc_addr = OPCODE_BA (finaladdr); + else + { + Elf32_Word *plt, *data_words; + Elf32_Word index, offset, num_plt_entries; + + plt = (Elf32_Word *) D_PTR (map, l_info[DT_PLTGOT]); + offset = reloc_addr - plt; + + if (offset < PLT_DOUBLE_SIZE*2 + PLT_INITIAL_ENTRY_WORDS) + { + index = (offset - PLT_INITIAL_ENTRY_WORDS)/2; + num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val + / sizeof(Elf32_Rela)); + data_words = plt + PLT_DATA_START_WORDS (num_plt_entries); + data_words[index] = finaladdr; + reloc_addr[0] = OPCODE_LI (11, index * 4); + reloc_addr[1] = OPCODE_B ((PLT_LONGBRANCH_ENTRY_WORDS + - (offset+1)) + * 4); + MODIFIED_CODE_NOQUEUE (reloc_addr + 1); + } + else + { + reloc_addr[0] = OPCODE_LIS_HI (12, finaladdr); + reloc_addr[1] = OPCODE_ADDI (12, 12, finaladdr); + reloc_addr[2] = OPCODE_MTCTR (12); + reloc_addr[3] = OPCODE_BCTR (); + MODIFIED_CODE_NOQUEUE (reloc_addr + 3); + } + } + } + break; + +#define DO_TLS_RELOC(suffix) \ + case R_PPC_DTPREL##suffix: \ + /* During relocation all TLS symbols are defined and used. \ + Therefore the offset is already correct. */ \ + if (sym_map != NULL) \ + do_reloc##suffix ("R_PPC_DTPREL"#suffix, \ + TLS_DTPREL_VALUE (sym, reloc)); \ + break; \ + case R_PPC_TPREL##suffix: \ + if (sym_map != NULL) \ + { \ + CHECK_STATIC_TLS (map, sym_map); \ + do_reloc##suffix ("R_PPC_TPREL"#suffix, \ + TLS_TPREL_VALUE (sym_map, sym, reloc)); \ + } \ + break; + + inline void do_reloc16 (const char *r_name, Elf32_Addr value) + { + if (__glibc_unlikely (value > 0x7fff && value < 0xffff8000)) + _dl_reloc_overflow (map, r_name, reloc_addr, refsym); + *(Elf32_Half *) reloc_addr = value; + } + inline void do_reloc16_LO (const char *r_name, Elf32_Addr value) + { + *(Elf32_Half *) reloc_addr = value; + } + inline void do_reloc16_HI (const char *r_name, Elf32_Addr value) + { + *(Elf32_Half *) reloc_addr = value >> 16; + } + inline void do_reloc16_HA (const char *r_name, Elf32_Addr value) + { + *(Elf32_Half *) reloc_addr = (value + 0x8000) >> 16; + } + DO_TLS_RELOC (16) + DO_TLS_RELOC (16_LO) + DO_TLS_RELOC (16_HI) + DO_TLS_RELOC (16_HA) + + default: + _dl_reloc_bad_type (map, rinfo, 0); + return; + } + + MODIFIED_CODE_NOQUEUE (reloc_addr); +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/dl-machine.h b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-machine.h new file mode 100644 index 0000000000..28eb50f92d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-machine.h @@ -0,0 +1,455 @@ +/* Machine-dependent ELF dynamic relocation inline functions. PowerPC version. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef dl_machine_h +#define dl_machine_h + +#define ELF_MACHINE_NAME "powerpc" + +#include <assert.h> +#include <dl-tls.h> +#include <dl-irel.h> +#include <hwcapinfo.h> + +/* Translate a processor specific dynamic tag to the index + in l_info array. */ +#define DT_PPC(x) (DT_PPC_##x - DT_LOPROC + DT_NUM) + +/* Return nonzero iff ELF header is compatible with the running host. */ +static inline int +elf_machine_matches_host (const Elf32_Ehdr *ehdr) +{ + return ehdr->e_machine == EM_PPC; +} + +/* Return the value of the GOT pointer. */ +static inline Elf32_Addr * __attribute__ ((const)) +ppc_got (void) +{ + Elf32_Addr *got; + + asm ("bcl 20,31,1f\n" + "1: mflr %0\n" + " addis %0,%0,_GLOBAL_OFFSET_TABLE_-1b@ha\n" + " addi %0,%0,_GLOBAL_OFFSET_TABLE_-1b@l\n" + : "=b" (got) : : "lr"); + + return got; +} + +/* Return the link-time address of _DYNAMIC, stored as + the first value in the GOT. */ +static inline Elf32_Addr __attribute__ ((const)) +elf_machine_dynamic (void) +{ + return *ppc_got (); +} + +/* Return the run-time load address of the shared object. */ +static inline Elf32_Addr __attribute__ ((const)) +elf_machine_load_address (void) +{ + Elf32_Addr *branchaddr; + Elf32_Addr runtime_dynamic; + + /* This is much harder than you'd expect. Possibly I'm missing something. + The 'obvious' way: + + Apparently, "bcl 20,31,$+4" is what should be used to load LR + with the address of the next instruction. + I think this is so that machines that do bl/blr pairing don't + get confused. + + asm ("bcl 20,31,0f ;" + "0: mflr 0 ;" + "lis %0,0b@ha;" + "addi %0,%0,0b@l;" + "subf %0,%0,0" + : "=b" (addr) : : "r0", "lr"); + + doesn't work, because the linker doesn't have to (and in fact doesn't) + update the @ha and @l references; the loader (which runs after this + code) will do that. + + Instead, we use the following trick: + + The linker puts the _link-time_ address of _DYNAMIC at the first + word in the GOT. We could branch to that address, if we wanted, + by using an @local reloc; the linker works this out, so it's safe + to use now. We can't, of course, actually branch there, because + we'd cause an illegal instruction exception; so we need to compute + the address ourselves. That gives us the following code: */ + + /* Get address of the 'b _DYNAMIC@local'... */ + asm ("bcl 20,31,0f;" + "b _DYNAMIC@local;" + "0:" + : "=l" (branchaddr)); + + /* So now work out the difference between where the branch actually points, + and the offset of that location in memory from the start of the file. */ + runtime_dynamic = ((Elf32_Addr) branchaddr + + ((Elf32_Sword) (*branchaddr << 6 & 0xffffff00) >> 6)); + + return runtime_dynamic - elf_machine_dynamic (); +} + +#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) /* nothing */ + +/* The PLT uses Elf32_Rela relocs. */ +#define elf_machine_relplt elf_machine_rela + +/* Mask identifying addresses reserved for the user program, + where the dynamic linker should not map anything. */ +#define ELF_MACHINE_USER_ADDRESS_MASK 0xf0000000UL + +/* The actual _start code is in dl-start.S. Use a really + ugly bit of assembler to let dl-start.o see _dl_start. */ +#define RTLD_START asm (".globl _dl_start"); + +/* Decide where a relocatable object should be loaded. */ +extern ElfW(Addr) +__elf_preferred_address(struct link_map *loader, size_t maplength, + ElfW(Addr) mapstartpref); +#define ELF_PREFERRED_ADDRESS(loader, maplength, mapstartpref) \ + __elf_preferred_address (loader, maplength, mapstartpref) + +/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry, so + PLT entries should not be allowed to define the value. + ELF_RTYPE_CLASS_COPY iff TYPE should not be allowed to resolve to one + of the main executable's symbols, as for a COPY reloc. */ +/* We never want to use a PLT entry as the destination of a + reloc, when what is being relocated is a branch. This is + partly for efficiency, but mostly so we avoid loops. */ +#define elf_machine_type_class(type) \ + ((((type) == R_PPC_JMP_SLOT \ + || (type) == R_PPC_REL24 \ + || ((type) >= R_PPC_DTPMOD32 /* contiguous TLS */ \ + && (type) <= R_PPC_DTPREL32) \ + || (type) == R_PPC_ADDR24) * ELF_RTYPE_CLASS_PLT) \ + | (((type) == R_PPC_COPY) * ELF_RTYPE_CLASS_COPY)) + +/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ +#define ELF_MACHINE_JMP_SLOT R_PPC_JMP_SLOT + +/* The PowerPC never uses REL relocations. */ +#define ELF_MACHINE_NO_REL 1 +#define ELF_MACHINE_NO_RELA 0 + +/* We define an initialization function to initialize HWCAP/HWCAP2 and + platform data so it can be copied into the TCB later. This is called + very early in _dl_sysdep_start for dynamically linked binaries. */ +#ifdef SHARED +# define DL_PLATFORM_INIT dl_platform_init () + +static inline void __attribute__ ((unused)) +dl_platform_init (void) +{ + __tcb_parse_hwcap_and_convert_at_platform (); +} +#endif + +/* Set up the loaded object described by MAP so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. + Also install a small trampoline to be used by entries that have + been relocated to an address too far away for a single branch. */ +extern int __elf_machine_runtime_setup (struct link_map *map, + int lazy, int profile); + +static inline int +elf_machine_runtime_setup (struct link_map *map, + int lazy, int profile) +{ + if (map->l_info[DT_JMPREL] == 0) + return lazy; + + if (map->l_info[DT_PPC(GOT)] == 0) + /* Handle old style PLT. */ + return __elf_machine_runtime_setup (map, lazy, profile); + + /* New style non-exec PLT consisting of an array of addresses. */ + map->l_info[DT_PPC(GOT)]->d_un.d_ptr += map->l_addr; + if (lazy) + { + Elf32_Addr *plt, *got, glink; + Elf32_Word num_plt_entries; + void (*dlrr) (void); + extern void _dl_runtime_resolve (void); + extern void _dl_prof_resolve (void); + + if (__glibc_likely (!profile)) + dlrr = _dl_runtime_resolve; + else + { + if (GLRO(dl_profile) != NULL + &&_dl_name_match_p (GLRO(dl_profile), map)) + GL(dl_profile_map) = map; + dlrr = _dl_prof_resolve; + } + got = (Elf32_Addr *) map->l_info[DT_PPC(GOT)]->d_un.d_ptr; + glink = got[1]; + got[1] = (Elf32_Addr) dlrr; + got[2] = (Elf32_Addr) map; + + /* Relocate everything in .plt by the load address offset. */ + plt = (Elf32_Addr *) D_PTR (map, l_info[DT_PLTGOT]); + num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val + / sizeof (Elf32_Rela)); + + /* If a library is prelinked but we have to relocate anyway, + we have to be able to undo the prelinking of .plt section. + The prelinker saved us at got[1] address of .glink + section's start. */ + if (glink) + { + glink += map->l_addr; + while (num_plt_entries-- != 0) + *plt++ = glink, glink += 4; + } + else + while (num_plt_entries-- != 0) + *plt++ += map->l_addr; + } + return lazy; +} + +/* Change the PLT entry whose reloc is 'reloc' to call the actual routine. */ +extern Elf32_Addr __elf_machine_fixup_plt (struct link_map *map, + Elf32_Addr *reloc_addr, + Elf32_Addr finaladdr); + +static inline Elf32_Addr +elf_machine_fixup_plt (struct link_map *map, lookup_t t, + const Elf32_Rela *reloc, + Elf32_Addr *reloc_addr, Elf64_Addr finaladdr) +{ + if (map->l_info[DT_PPC(GOT)] == 0) + /* Handle old style PLT. */ + return __elf_machine_fixup_plt (map, reloc_addr, finaladdr); + + *reloc_addr = finaladdr; + return finaladdr; +} + +/* Return the final value of a plt relocation. */ +static inline Elf32_Addr +elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + Elf32_Addr value) +{ + return value + reloc->r_addend; +} + + +/* Names of the architecture-specific auditing callback functions. */ +#define ARCH_LA_PLTENTER ppc32_gnu_pltenter +#define ARCH_LA_PLTEXIT ppc32_gnu_pltexit + +#endif /* dl_machine_h */ + +#ifdef RESOLVE_MAP + +/* Do the actual processing of a reloc, once its target address + has been determined. */ +extern void __process_machine_rela (struct link_map *map, + const Elf32_Rela *reloc, + struct link_map *sym_map, + const Elf32_Sym *sym, + const Elf32_Sym *refsym, + Elf32_Addr *const reloc_addr, + Elf32_Addr finaladdr, + int rinfo) attribute_hidden; + +/* Call _dl_signal_error when a resolved value overflows a relocated area. */ +extern void _dl_reloc_overflow (struct link_map *map, + const char *name, + Elf32_Addr *const reloc_addr, + const Elf32_Sym *refsym) attribute_hidden; + +/* Perform the relocation specified by RELOC and SYM (which is fully resolved). + LOADADDR is the load address of the object; INFO is an array indexed + by DT_* of the .dynamic section info. */ + +auto inline void __attribute__ ((always_inline)) +elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + const Elf32_Sym *sym, const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) +{ + Elf32_Addr *const reloc_addr = reloc_addr_arg; + const Elf32_Sym *const refsym = sym; + Elf32_Addr value; + const int r_type = ELF32_R_TYPE (reloc->r_info); + struct link_map *sym_map = NULL; + +#ifndef RESOLVE_CONFLICT_FIND_MAP + if (r_type == R_PPC_RELATIVE) + { + *reloc_addr = map->l_addr + reloc->r_addend; + return; + } + + if (__glibc_unlikely (r_type == R_PPC_NONE)) + return; + + /* binutils on ppc32 includes st_value in r_addend for relocations + against local symbols. */ + if (__builtin_expect (ELF32_ST_BIND (sym->st_info) == STB_LOCAL, 0) + && sym->st_shndx != SHN_UNDEF) + value = map->l_addr; + else + { + sym_map = RESOLVE_MAP (&sym, version, r_type); + value = sym_map == NULL ? 0 : sym_map->l_addr + sym->st_value; + } + value += reloc->r_addend; +#else + value = reloc->r_addend; +#endif + + if (sym != NULL + && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0) + && __builtin_expect (sym->st_shndx != SHN_UNDEF, 1) + && __builtin_expect (!skip_ifunc, 1)) + value = elf_ifunc_invoke (value); + + /* A small amount of code is duplicated here for speed. In libc, + more than 90% of the relocs are R_PPC_RELATIVE; in the X11 shared + libraries, 60% are R_PPC_RELATIVE, 24% are R_PPC_GLOB_DAT or + R_PPC_ADDR32, and 16% are R_PPC_JMP_SLOT (which this routine + wouldn't usually handle). As an bonus, doing this here allows + the switch statement in __process_machine_rela to work. */ + switch (r_type) + { + case R_PPC_GLOB_DAT: + case R_PPC_ADDR32: + *reloc_addr = value; + break; + +#ifndef RESOLVE_CONFLICT_FIND_MAP +# ifdef RTLD_BOOTSTRAP +# define NOT_BOOTSTRAP 0 +# else +# define NOT_BOOTSTRAP 1 +# endif + + case R_PPC_DTPMOD32: + if (map->l_info[DT_PPC(OPT)] + && (map->l_info[DT_PPC(OPT)]->d_un.d_val & PPC_OPT_TLS)) + { + if (!NOT_BOOTSTRAP) + { + reloc_addr[0] = 0; + reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET + + TLS_DTV_OFFSET); + break; + } + else if (sym_map != NULL) + { +# ifndef SHARED + CHECK_STATIC_TLS (map, sym_map); +# else + if (TRY_STATIC_TLS (map, sym_map)) +# endif + { + reloc_addr[0] = 0; + /* Set up for local dynamic. */ + reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET + + TLS_DTV_OFFSET); + break; + } + } + } + if (!NOT_BOOTSTRAP) + /* During startup the dynamic linker is always index 1. */ + *reloc_addr = 1; + else if (sym_map != NULL) + /* Get the information from the link map returned by the + RESOLVE_MAP function. */ + *reloc_addr = sym_map->l_tls_modid; + break; + case R_PPC_DTPREL32: + if (map->l_info[DT_PPC(OPT)] + && (map->l_info[DT_PPC(OPT)]->d_un.d_val & PPC_OPT_TLS)) + { + if (!NOT_BOOTSTRAP) + { + *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc); + break; + } + else if (sym_map != NULL) + { + /* This reloc is always preceded by R_PPC_DTPMOD32. */ +# ifndef SHARED + assert (HAVE_STATIC_TLS (map, sym_map)); +# else + if (HAVE_STATIC_TLS (map, sym_map)) +# endif + { + *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc); + break; + } + } + } + /* During relocation all TLS symbols are defined and used. + Therefore the offset is already correct. */ + if (NOT_BOOTSTRAP && sym_map != NULL) + *reloc_addr = TLS_DTPREL_VALUE (sym, reloc); + break; + case R_PPC_TPREL32: + if (!NOT_BOOTSTRAP || sym_map != NULL) + { + if (NOT_BOOTSTRAP) + CHECK_STATIC_TLS (map, sym_map); + *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc); + } + break; +#endif + + case R_PPC_JMP_SLOT: +#ifdef RESOLVE_CONFLICT_FIND_MAP + RESOLVE_CONFLICT_FIND_MAP (map, reloc_addr); +#endif + if (map->l_info[DT_PPC(GOT)] != 0) + { + *reloc_addr = value; + break; + } + /* FALLTHROUGH */ + + default: + __process_machine_rela (map, reloc, sym_map, sym, refsym, + reloc_addr, value, r_type); + } +} + +auto inline void __attribute__ ((always_inline)) +elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) +{ + Elf32_Addr *const reloc_addr = reloc_addr_arg; + *reloc_addr = l_addr + reloc->r_addend; +} + +auto inline void __attribute__ ((always_inline)) +elf_machine_lazy_rel (struct link_map *map, + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) +{ + /* elf_machine_runtime_setup handles this. */ +} + +#endif /* RESOLVE_MAP */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/dl-start.S b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-start.S new file mode 100644 index 0000000000..ab429567aa --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-start.S @@ -0,0 +1,103 @@ +/* Machine-dependent ELF startup code. PowerPC version. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Initial entry point code for the dynamic linker. + The C function `_dl_start' is the real entry point; + its return value is the user program's entry point. */ +ENTRY(_start) +/* We start with the following on the stack, from top: + argc (4 bytes); + arguments for program (terminated by NULL); + environment variables (terminated by NULL); + arguments for the program loader. */ + +/* Call _dl_start with one parameter pointing at argc */ + mr r3,r1 +/* (we have to frob the stack pointer a bit to allow room for + _dl_start to save the link register). */ + li r4,0 + addi r1,r1,-16 + stw r4,0(r1) + bl _dl_start@local + + /* FALLTHRU */ +_dl_start_user: +/* Now, we do our main work of calling initialisation procedures. + The ELF ABI doesn't say anything about parameters for these, + so we just pass argc, argv, and the environment. + Changing these is strongly discouraged (not least because argc is + passed by value!). */ + +/* Put our GOT pointer in r31, */ + SETUP_GOT_ACCESS(r31,got_label) + addis r31,r31,_GLOBAL_OFFSET_TABLE_-got_label@ha + addi r31,r31,_GLOBAL_OFFSET_TABLE_-got_label@l +/* the address of _start in r30, */ + mr r30,r3 +/* &_dl_argc in 29, &_dl_argv in 27, and _dl_loaded in 28. */ + lwz r28,_rtld_local@got(r31) + lwz r29,_dl_argc@got(r31) + lwz r27,__GI__dl_argv@got(r31) + +/* Call _dl_init (_dl_loaded, _dl_argc, _dl_argv, _dl_argv+_dl_argc+1). */ + lwz r3,0(r28) + lwz r4,0(r29) + lwz r5,0(r27) + slwi r6,r4,2 + add r6,r5,r6 + addi r6,r6,4 + bl _dl_init@local + +/* Now, to conform to the ELF ABI, we have to: */ +/* Pass argc (actually _dl_argc) in r3; */ + lwz r3,0(r29) +/* pass argv (actually _dl_argv) in r4; */ + lwz r4,0(r27) +/* pass envp (actually _dl_argv+_dl_argc+1) in r5; */ + slwi r5,r3,2 + add r6,r4,r5 + addi r5,r6,4 +/* pass the auxiliary vector in r6. This is passed to us just after _envp. */ +2: lwzu r0,4(r6) + cmpwi r0,0 + bne 2b + addi r6,r6,4 +/* Pass a termination function pointer (in this case _dl_fini) in r7. */ + lwz r7,_dl_fini@got(r31) +/* Now, call the start function in r30... */ + mtctr r30 +/* Pass the stack pointer in r1 (so far so good), pointing to a NULL value. + (This lets our startup code distinguish between a program linked statically, + which linux will call with argc on top of the stack which will hopefully + never be zero, and a dynamically linked program which will always have + a NULL on the top of the stack). + Take the opportunity to clear LR, so anyone who accidentally returns + from _start gets SEGV. Also clear the next few words of the stack. */ + +_dl_main_dispatch: + li r31,0 + stw r31,0(r1) + mtlr r31 + stw r31,4(r1) + stw r31,8(r1) + stw r31,12(r1) +/* Go do it! */ + bctr +END(_start) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/dl-trampoline.S b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-trampoline.S new file mode 100644 index 0000000000..16b12db0e4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/dl-trampoline.S @@ -0,0 +1,189 @@ +/* PLT trampolines. PPC32 version. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".text" + .align 2 + .globl _dl_runtime_resolve + .type _dl_runtime_resolve,@function +_dl_runtime_resolve: + cfi_startproc + # We need to save the registers used to pass parameters, and register 0, + # which is used by _mcount; the registers are saved in a stack frame. + stwu r1,-64(r1) + cfi_adjust_cfa_offset (64) + stw r0,12(r1) + stw r3,16(r1) + stw r4,20(r1) + # The code that calls this has put parameters for `fixup' in r12 and r11. + mr r3,r12 + stw r5,24(r1) + mr r4,r11 + stw r6,28(r1) + mflr r0 + # We also need to save some of the condition register fields + stw r7,32(r1) + # Don't clobber the caller's LRSAVE, it is needed by _mcount. + stw r0,48(r1) + cfi_offset (lr, -16) + stw r8,36(r1) + mfcr r0 + stw r9,40(r1) + stw r10,44(r1) + stw r0,8(r1) + bl _dl_fixup@local + # 'fixup' returns the address we want to branch to. + mtctr r3 + # Put the registers back... + lwz r0,48(r1) + lwz r10,44(r1) + lwz r9,40(r1) + mtlr r0 + lwz r8,36(r1) + lwz r0,8(r1) + lwz r7,32(r1) + lwz r6,28(r1) + mtcrf 0xFF,r0 + lwz r5,24(r1) + lwz r4,20(r1) + lwz r3,16(r1) + lwz r0,12(r1) + # ...unwind the stack frame, and jump to the PLT entry we updated. + addi r1,r1,64 + bctr + cfi_endproc + .size _dl_runtime_resolve,.-_dl_runtime_resolve + +#ifndef PROF + .align 2 + .globl _dl_prof_resolve + .type _dl_prof_resolve,@function +_dl_prof_resolve: + cfi_startproc + # We need to save the registers used to pass parameters, and register 0, + # which is used by _mcount; the registers are saved in a stack frame. + stwu r1,-320(r1) + cfi_adjust_cfa_offset (320) + /* Stack layout: + + +312 stackframe + +308 lr + +304 r1 + +288 v12 + +272 v11 + +256 v10 + +240 v9 + +224 v8 + +208 v7 + +192 v6 + +176 v5 + +160 v4 + +144 v3 + +128 v2 + +112 v1 + +104 fp8 + +96 fp7 + +88 fp6 + +80 fp5 + +72 fp4 + +64 fp3 + +56 fp2 + +48 fp1 + +44 r10 + +40 r9 + +36 r8 + +32 r7 + +28 r6 + +24 r5 + +20 r4 + +16 r3 + +12 r0 + +8 cr + r1 link + */ + stw r0,12(r1) + stw r3,16(r1) + stw r4,20(r1) + # The code that calls this has put parameters for `fixup' in r12 and r11. + mr r3,r12 + stw r5,24(r1) + mr r4,r11 + stw r6,28(r1) + mflr r5 + # We also need to save some of the condition register fields. + stw r7,32(r1) + # Don't clobber the caller's LRSAVE, it is needed by _mcount. + stw r5,308(r1) + cfi_offset (lr, -12) + stw r8,36(r1) + mfcr r0 + stw r9,40(r1) + stw r10,44(r1) + stw r0,8(r1) +#ifndef __NO_FPRS__ + # Save the floating point registers + stfd fp1,48(r1) + stfd fp2,56(r1) + stfd fp3,64(r1) + stfd fp4,72(r1) + stfd fp5,80(r1) + stfd fp6,88(r1) + stfd fp7,96(r1) + stfd fp8,104(r1) +#endif + # XXX TODO: store vmx registers + # Load the extra parameters. + addi r6,r1,16 + addi r7,r1,312 + li r0,-1 + stw r0,0(r7) + bl _dl_profile_fixup@local + # 'fixup' returns the address we want to branch to. + mtctr r3 + # Put the registers back... + lwz r0,308(r1) + lwz r10,44(r1) + lwz r9,40(r1) + mtlr r0 + lwz r8,36(r1) + lwz r0,8(r1) + lwz r7,32(r1) + lwz r6,28(r1) + mtcrf 0xFF,r0 + lwz r5,24(r1) + lwz r4,20(r1) + lwz r3,16(r1) + lwz r0,12(r1) +#ifndef __NO_FPRS__ + # Load the floating point registers. + lfd fp1,48(r1) + lfd fp2,56(r1) + lfd fp3,64(r1) + lfd fp4,72(r1) + lfd fp5,80(r1) + lfd fp6,88(r1) + lfd fp7,96(r1) + lfd fp8,104(r1) +#endif + # ...unwind the stack frame, and jump to the PLT entry we updated. + addi r1,r1,320 + bctr + cfi_endproc + .size _dl_prof_resolve,.-_dl_prof_resolve +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/Makefile new file mode 100644 index 0000000000..adf556870a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/Makefile @@ -0,0 +1,9 @@ +ifeq ($(subdir),math) +libm-routines += fexcepts_to_spe fexcepts_from_spe +libm-routines += fexcepts_to_prctl fexcepts_from_prctl +libm-routines += fe_note_change +endif + +ifeq ($(subdir),soft-fp) +sysdep_routines += fraiseexcept-soft +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/atomic-feclearexcept.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/atomic-feclearexcept.c new file mode 100644 index 0000000000..09132451a0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/atomic-feclearexcept.c @@ -0,0 +1,50 @@ +/* Clear floating-point exceptions for atomic compound assignment. + e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <stdlib.h> +#include <sysdep.h> +#include <sys/prctl.h> + +void +__atomic_feclearexcept (void) +{ + unsigned int fpescr, old_fpescr; + + /* Get the current state. */ + old_fpescr = fpescr = fegetenv_register (); + + /* Clear the relevant bits. */ + fpescr &= ~SPEFSCR_ALL_EXCEPT; + + /* Put the new state in effect. */ + fesetenv_register (fpescr); + + /* Let the kernel know if the "invalid" or "underflow" bit was + cleared. */ + if (old_fpescr & (SPEFSCR_FINVS | SPEFSCR_FUNFS)) + { + int pflags __attribute__ ((__unused__)), r; + INTERNAL_SYSCALL_DECL (err); + + r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + abort (); + } +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/atomic-feholdexcept.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/atomic-feholdexcept.c new file mode 100644 index 0000000000..3d6e10f1b6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/atomic-feholdexcept.c @@ -0,0 +1,55 @@ +/* Store current floating-point environment and clear exceptions for + atomic compound assignment. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <stdlib.h> +#include <sysdep.h> +#include <sys/prctl.h> + +void +__atomic_feholdexcept (fenv_t *envp) +{ + fenv_union_t u; + INTERNAL_SYSCALL_DECL (err); + int r; + + /* Get the current state. */ + r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &u.l[0]); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + abort (); + + u.l[1] = fegetenv_register (); + *envp = u.fenv; + + /* Clear everything except for the rounding mode and trapping to the + kernel. */ + u.l[0] &= ~(PR_FP_EXC_DIV + | PR_FP_EXC_OVF + | PR_FP_EXC_UND + | PR_FP_EXC_RES + | PR_FP_EXC_INV); + u.l[1] &= SPEFSCR_FRMC | (SPEFSCR_ALL_EXCEPT_ENABLE & ~SPEFSCR_FINXE); + + /* Put the new state in effect. */ + fesetenv_register (u.l[1]); + r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC, + u.l[0] | PR_FP_EXC_SW_ENABLE); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + abort (); +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/atomic-feupdateenv.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/atomic-feupdateenv.c new file mode 100644 index 0000000000..a4615a1b01 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/atomic-feupdateenv.c @@ -0,0 +1,46 @@ +/* Install given floating-point environment and raise exceptions for + atomic compound assignment. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <stdlib.h> +#include <sysdep.h> +#include <sys/prctl.h> + +void +__atomic_feupdateenv (const fenv_t *envp) +{ + int exc; + fenv_union_t u; + INTERNAL_SYSCALL_DECL (err); + int r; + + /* Save the currently set exceptions. */ + exc = fegetenv_register () & SPEFSCR_ALL_EXCEPT; + + u.fenv = *envp; + + fesetenv_register (u.l[1]); + r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC, + u.l[0] | PR_FP_EXC_SW_ENABLE); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + abort (); + + /* Raise (if appropriate) saved exceptions. */ + __feraiseexcept_soft (exc); +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fclrexcpt.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fclrexcpt.c new file mode 100644 index 0000000000..cbf8d9df6c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fclrexcpt.c @@ -0,0 +1,53 @@ +/* Clear given exceptions in current floating-point environment. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +#undef feclearexcept +int +__feclearexcept (int excepts) +{ + unsigned int fpescr; + int excepts_spe = __fexcepts_to_spe (excepts); + + /* Get the current state. */ + fpescr = fegetenv_register (); + + /* Clear the relevant bits. */ + fpescr &= ~excepts_spe; + + /* Put the new state in effect. */ + fesetenv_register (fpescr); + + /* Let the kernel know if the "invalid" or "underflow" bit was + cleared. */ + if (excepts & (FE_INVALID | FE_UNDERFLOW)) + __fe_note_change (); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feclearexcept, __old_feclearexcept) +compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1); +#endif + +libm_hidden_ver (__feclearexcept, feclearexcept) +versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fe_note_change.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fe_note_change.c new file mode 100644 index 0000000000..3dd3161f7d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fe_note_change.c @@ -0,0 +1,39 @@ +/* Note a change to floating-point exceptions. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sysdep.h> +#include <sys/prctl.h> + +/* Inform the kernel of a change to floating-point exceptions. */ + +void +__fe_note_change (void) +{ + int pflags, r; + INTERNAL_SYSCALL_DECL (err); + + r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return; + if ((pflags & PR_FP_EXC_SW_ENABLE) == 0) + INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC, + pflags | PR_FP_EXC_SW_ENABLE); +} + +libm_hidden_def (__fe_note_change) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fedisblxcpt.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fedisblxcpt.c new file mode 100644 index 0000000000..94ce45463c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fedisblxcpt.c @@ -0,0 +1,54 @@ +/* Disable floating-point exceptions. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sysdep.h> +#include <sys/prctl.h> + +int +fedisableexcept (int excepts) +{ + int result = 0, pflags, r; + INTERNAL_SYSCALL_DECL (err); + + r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + /* Save old enable bits. */ + result = __fexcepts_from_prctl (pflags); + + pflags &= ~__fexcepts_to_prctl (excepts); + r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC, + pflags | PR_FP_EXC_SW_ENABLE); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + /* If disabling signals for "inexact", also disable trapping to the + kernel. */ + if ((excepts & FE_INEXACT) != 0) + { + unsigned long fpescr; + + fpescr = fegetenv_register (); + fpescr &= ~SPEFSCR_FINXE; + fesetenv_register (fpescr); + } + + return result; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/feenablxcpt.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/feenablxcpt.c new file mode 100644 index 0000000000..32116d1608 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/feenablxcpt.c @@ -0,0 +1,54 @@ +/* Enable floating-point exceptions. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sysdep.h> +#include <sys/prctl.h> + +int +feenableexcept (int excepts) +{ + unsigned int result = 0, pflags, r; + INTERNAL_SYSCALL_DECL (err); + + r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + /* Save old enable bits. */ + result = __fexcepts_from_prctl (pflags); + + pflags |= __fexcepts_to_prctl (excepts); + r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC, + pflags | PR_FP_EXC_SW_ENABLE); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + /* If enabling signals for "inexact", also enable trapping to the + kernel. */ + if ((excepts & FE_INEXACT) != 0) + { + unsigned long fpescr; + + fpescr = fegetenv_register (); + fpescr |= SPEFSCR_FINXE; + fesetenv_register (fpescr); + } + + return result; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetenv.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetenv.c new file mode 100644 index 0000000000..01b8fa4c9a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetenv.c @@ -0,0 +1,49 @@ +/* Store current floating-point environment. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sysdep.h> +#include <sys/prctl.h> + +int +__fegetenv (fenv_t *envp) +{ + fenv_union_t u; + INTERNAL_SYSCALL_DECL (err); + int r; + + r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &u.l[0]); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + u.l[1] = fegetenv_register (); + *envp = u.fenv; + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fegetenv, __old_fegetenv) +compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1); +#endif +libm_hidden_def (__fegetenv) +libm_hidden_ver (__fegetenv, fegetenv) + +versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetexcept.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetexcept.c new file mode 100644 index 0000000000..74fdb5a1c9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetexcept.c @@ -0,0 +1,36 @@ +/* Get floating-point exceptions. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sysdep.h> +#include <sys/prctl.h> + +int +fegetexcept (void) +{ + int result = 0, pflags, r; + INTERNAL_SYSCALL_DECL (err); + + r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + result = __fexcepts_from_prctl (pflags); + + return result; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetmode.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetmode.c new file mode 100644 index 0000000000..d262714266 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetmode.c @@ -0,0 +1,37 @@ +/* Store current floating-point control modes. e500 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sysdep.h> +#include <sys/prctl.h> + +int +fegetmode (femode_t *modep) +{ + fenv_union_t u; + INTERNAL_SYSCALL_DECL (err); + int r; + + r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &u.l[0]); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + u.l[1] = fegetenv_register (); + *modep = u.fenv; + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetround.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetround.c new file mode 100644 index 0000000000..afcc5d18cc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fegetround.c @@ -0,0 +1,31 @@ +/* Return current rounding direction. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fegetround (void) +{ + unsigned long fpescr; + + fpescr = fegetenv_register (); + return fpescr & 3; +} +libm_hidden_def (__fegetround) +weak_alias (__fegetround, fegetround) +libm_hidden_weak (fegetround) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/feholdexcpt.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/feholdexcpt.c new file mode 100644 index 0000000000..cba1239561 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/feholdexcpt.c @@ -0,0 +1,59 @@ +/* Store current floating-point environment and clear exceptions. + e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sysdep.h> +#include <sys/prctl.h> + +int +__feholdexcept (fenv_t *envp) +{ + fenv_union_t u; + INTERNAL_SYSCALL_DECL (err); + int r; + + /* Get the current state. */ + r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &u.l[0]); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + u.l[1] = fegetenv_register (); + *envp = u.fenv; + + /* Clear everything except for the rounding mode and trapping to the + kernel. */ + u.l[0] &= ~(PR_FP_EXC_DIV + | PR_FP_EXC_OVF + | PR_FP_EXC_UND + | PR_FP_EXC_RES + | PR_FP_EXC_INV); + u.l[1] &= SPEFSCR_FRMC | (SPEFSCR_ALL_EXCEPT_ENABLE & ~SPEFSCR_FINXE); + + /* Put the new state in effect. */ + fesetenv_register (u.l[1]); + r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC, + u.l[0] | PR_FP_EXC_SW_ENABLE); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + return 0; +} +libm_hidden_def (__feholdexcept) +weak_alias (__feholdexcept, feholdexcept) +libm_hidden_weak (feholdexcept) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_const.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_const.c new file mode 100644 index 0000000000..9fc3f53bc7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_const.c @@ -0,0 +1,45 @@ +/* Constant floating-point environments for e500. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* The use of "unsigned long long" as the type to define the + bit-pattern explicitly, rather than the type "double" used in + <bits/fenv.h>, means that we cannot include <fenv_libc.h> here to + get the enum constants for the SPEFSCR bits to enable + exceptions. */ + +#include <sys/prctl.h> + +/* If the default argument is used we use this value. */ +const unsigned long long __fe_dfl_env __attribute__ ((aligned (8))) = + 0x3cULL; + +/* The same representation is used for femode_t. */ +extern const unsigned long long __fe_dfl_mode + __attribute__ ((aligned (8), alias ("__fe_dfl_env"))); + +/* Floating-point environment where none of the exceptions are masked. */ +const unsigned long long __fe_enabled_env __attribute__ ((aligned (8))) = + (((unsigned long long) (PR_FP_EXC_DIV + | PR_FP_EXC_OVF + | PR_FP_EXC_UND + | PR_FP_EXC_RES + | PR_FP_EXC_INV)) << 32) | 0x7cULL; + +/* Non-IEEE mode. */ +const unsigned long long __fe_nonieee_env __attribute__ ((aligned (8))) = + 0x0ULL; diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_libc.h b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_libc.h new file mode 100644 index 0000000000..13437f8052 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_libc.h @@ -0,0 +1,99 @@ +/* Internal libc stuff for floating point environment routines. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _FENV_LIBC_H +#define _FENV_LIBC_H 1 + +#include <fenv.h> + +int __feraiseexcept_spe (int); +libm_hidden_proto (__feraiseexcept_spe) + +int __feraiseexcept_soft (int); +libc_hidden_proto (__feraiseexcept_soft) + +int __fexcepts_to_spe (int); +libm_hidden_proto (__fexcepts_to_spe) + +int __fexcepts_from_spe (int); +libm_hidden_proto (__fexcepts_from_spe) + +int __fexcepts_to_prctl (int); +libm_hidden_proto (__fexcepts_to_prctl) + +int __fexcepts_from_prctl (int); +libm_hidden_proto (__fexcepts_from_prctl) + +void __fe_note_change (void); +libm_hidden_proto (__fe_note_change) + +/* Equivalent to fegetenv, but returns an unsigned int instead of + taking a pointer. */ +#define fegetenv_register() \ + ({ unsigned int fscr; asm volatile ("mfspefscr %0" : "=r" (fscr)); fscr; }) + +/* Equivalent to fesetenv, but takes an unsigned int instead of a + pointer. */ +#define fesetenv_register(fscr) \ + ({ asm volatile ("mtspefscr %0" : : "r" (fscr)); }) + +typedef union +{ + fenv_t fenv; + unsigned int l[2]; +} fenv_union_t; + +/* Definitions of all the SPEFSCR bit numbers. */ +enum { + SPEFSCR_SOVH = 0x80000000, + SPEFSCR_OVH = 0x40000000, + SPEFSCR_FGH = 0x20000000, + SPEFSCR_FXH = 0x10000000, + SPEFSCR_FINVH = 0x08000000, + SPEFSCR_FDBZH = 0x04000000, + SPEFSCR_FUNFH = 0x02000000, + SPEFSCR_FOVFH = 0x01000000, + /* 2 unused bits. */ + SPEFSCR_FINXS = 0x00200000, + SPEFSCR_FINVS = 0x00100000, + SPEFSCR_FDBZS = 0x00080000, + SPEFSCR_FUNFS = 0x00040000, + SPEFSCR_FOVFS = 0x00020000, + /* Combination of the exception bits. */ + SPEFSCR_ALL_EXCEPT = 0x003e0000, + SPEFSCR_MODE = 0x00010000, + SPEFSCR_SOV = 0x00008000, + SPEFSCR_OV = 0x00004000, + SPEFSCR_FG = 0x00002000, + SPEFSCR_FX = 0x00001000, + SPEFSCR_FINV = 0x00000800, + SPEFSCR_FDBZ = 0x00000400, + SPEFSCR_FUNF = 0x00000200, + SPEFSCR_FOVF = 0x00000100, + /* 1 unused bit. */ + SPEFSCR_FINXE = 0x00000040, + SPEFSCR_FINVE = 0x00000020, + SPEFSCR_FDBZE = 0x00000010, + SPEFSCR_FUNFE = 0x00000008, + SPEFSCR_FOVFE = 0x00000004, + /* Combination of the exception trap enable bits. */ + SPEFSCR_ALL_EXCEPT_ENABLE = 0x0000007c, + SPEFSCR_FRMC = 0x00000003 +}; + +#endif /* fenv_libc.h */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetenv.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetenv.c new file mode 100644 index 0000000000..185bcdb051 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetenv.c @@ -0,0 +1,50 @@ +/* Install given floating-point environment. e500 version. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sysdep.h> +#include <sys/prctl.h> + +int +__fesetenv (const fenv_t *envp) +{ + fenv_union_t u; + INTERNAL_SYSCALL_DECL (err); + int r; + + u.fenv = *envp; + + fesetenv_register (u.l[1]); + r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC, + u.l[0] | PR_FP_EXC_SW_ENABLE); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fesetenv, __old_fesetenv) +compat_symbol (libm, __old_fesetenv, fesetenv, GLIBC_2_1); +#endif + +libm_hidden_def (__fesetenv) +libm_hidden_ver (__fesetenv, fesetenv) +versioned_symbol (libm, __fesetenv, fesetenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetexcept.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetexcept.c new file mode 100644 index 0000000000..688583a6e7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetexcept.c @@ -0,0 +1,37 @@ +/* Set given exception flags. e500 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fesetexcept (int excepts) +{ + unsigned long old_spefscr, spefscr; + int excepts_spe = __fexcepts_to_spe (excepts); + + old_spefscr = fegetenv_register (); + spefscr = old_spefscr | excepts_spe; + fesetenv_register (spefscr); + + /* If the state of the "invalid" or "underflow" flag has changed, + inform the kernel. */ + if (((spefscr ^ old_spefscr) & (SPEFSCR_FINVS | SPEFSCR_FUNFS)) != 0) + __fe_note_change (); + + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetmode.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetmode.c new file mode 100644 index 0000000000..360e500b27 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetmode.c @@ -0,0 +1,43 @@ +/* Install given floating-point control modes. e500 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sysdep.h> +#include <sys/prctl.h> + +#define SPEFSCR_STATUS 0xff3eff00 + +int +fesetmode (const femode_t *modep) +{ + fenv_union_t u; + INTERNAL_SYSCALL_DECL (err); + int r; + + u.fenv = *modep; + unsigned int spefscr = fegetenv_register (); + spefscr = (spefscr & SPEFSCR_STATUS) | (u.l[1] & ~SPEFSCR_STATUS); + + fesetenv_register (spefscr); + r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC, + u.l[0] | PR_FP_EXC_SW_ENABLE); + if (INTERNAL_SYSCALL_ERROR_P (r, err)) + return -1; + + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetround.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetround.c new file mode 100644 index 0000000000..15aaa62079 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fesetround.c @@ -0,0 +1,37 @@ +/* Set current rounding direction. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fesetround (int round) +{ + unsigned long fpescr; + + if ((unsigned int) round > 3) + return 1; + + fpescr = fegetenv_register (); + fpescr = (fpescr & ~SPEFSCR_FRMC) | (round & 3); + fesetenv_register (fpescr); + + return 0; +} +libm_hidden_def (__fesetround) +weak_alias (__fesetround, fesetround) +libm_hidden_weak (fesetround) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fetestexceptflag.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fetestexceptflag.c new file mode 100644 index 0000000000..9d42d919ec --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fetestexceptflag.c @@ -0,0 +1,25 @@ +/* Test exception in saved exception state. e500 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fetestexceptflag (const fexcept_t *flagp, int excepts) +{ + return __fexcepts_from_spe (*flagp) & excepts & FE_ALL_EXCEPT; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/feupdateenv.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/feupdateenv.c new file mode 100644 index 0000000000..54de708449 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/feupdateenv.c @@ -0,0 +1,48 @@ +/* Install given floating-point environment and raise exceptions. + e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__feupdateenv (const fenv_t *envp) +{ + int exc; + + /* Save the currently set exceptions. */ + exc = fegetenv_register () & SPEFSCR_ALL_EXCEPT; + + /* Install new environment. */ + __fesetenv (envp); + + /* Raise (if appropriate) saved exceptions. */ + __feraiseexcept_spe (exc); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feupdateenv, __old_feupdateenv) +compat_symbol (libm, __old_feupdateenv, feupdateenv, GLIBC_2_1); +#endif + +libm_hidden_def (__feupdateenv) +libm_hidden_ver (__feupdateenv, feupdateenv) +versioned_symbol (libm, __feupdateenv, feupdateenv, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_prctl.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_prctl.c new file mode 100644 index 0000000000..b260fc8df1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_prctl.c @@ -0,0 +1,42 @@ +/* Convert floating-point exceptions from prctl form. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sys/prctl.h> + +/* Convert EXCEPTS from prctl bits to FE_* form, returning the + converted value. */ + +int +__fexcepts_from_prctl (int excepts) +{ + int result = 0; + if (excepts & PR_FP_EXC_OVF) + result |= FE_OVERFLOW; + if (excepts & PR_FP_EXC_UND) + result |= FE_UNDERFLOW; + if (excepts & PR_FP_EXC_INV) + result |= FE_INVALID; + if (excepts & PR_FP_EXC_DIV) + result |= FE_DIVBYZERO; + if (excepts & PR_FP_EXC_RES) + result |= FE_INEXACT; + return result; +} + +libm_hidden_def (__fexcepts_from_prctl) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_spe.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_spe.c new file mode 100644 index 0000000000..a925fe4c37 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_spe.c @@ -0,0 +1,41 @@ +/* Convert floating-point exceptions from SPEFSCR form. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +/* Convert EXCEPTS from SPEFSCR bits to FE_* form, returning the + converted value. */ + +int +__fexcepts_from_spe (int excepts) +{ + int result = 0; + if (excepts & SPEFSCR_FINXS) + result |= FE_INEXACT; + if (excepts & SPEFSCR_FDBZS) + result |= FE_DIVBYZERO; + if (excepts & SPEFSCR_FUNFS) + result |= FE_UNDERFLOW; + if (excepts & SPEFSCR_FOVFS) + result |= FE_OVERFLOW; + if (excepts & SPEFSCR_FINVS) + result |= FE_INVALID; + return result; +} + +libm_hidden_def (__fexcepts_from_spe) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_prctl.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_prctl.c new file mode 100644 index 0000000000..e4626312ee --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_prctl.c @@ -0,0 +1,42 @@ +/* Convert floating-point exceptions to prctl form. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <sys/prctl.h> + +/* Convert EXCEPTS from FE_* form to prctl bits, returning the + converted value. */ + +int +__fexcepts_to_prctl (int excepts) +{ + int result = 0; + if (excepts & FE_INEXACT) + result |= PR_FP_EXC_RES; + if (excepts & FE_DIVBYZERO) + result |= PR_FP_EXC_DIV; + if (excepts & FE_UNDERFLOW) + result |= PR_FP_EXC_UND; + if (excepts & FE_OVERFLOW) + result |= PR_FP_EXC_OVF; + if (excepts & FE_INVALID) + result |= PR_FP_EXC_INV; + return result; +} + +libm_hidden_def (__fexcepts_to_prctl) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_spe.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_spe.c new file mode 100644 index 0000000000..3eed4ae6e9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_spe.c @@ -0,0 +1,41 @@ +/* Convert floating-point exceptions to SPEFSCR form. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +/* Convert EXCEPTS from FE_* form to SPEFSCR bits, returning the + converted value. */ + +int +__fexcepts_to_spe (int excepts) +{ + int result = 0; + if (excepts & FE_INEXACT) + result |= SPEFSCR_FINXS; + if (excepts & FE_DIVBYZERO) + result |= SPEFSCR_FDBZS; + if (excepts & FE_UNDERFLOW) + result |= SPEFSCR_FUNFS; + if (excepts & FE_OVERFLOW) + result |= SPEFSCR_FOVFS; + if (excepts & FE_INVALID) + result |= SPEFSCR_FINVS; + return result; +} + +libm_hidden_def (__fexcepts_to_spe) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fgetexcptflg.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fgetexcptflg.c new file mode 100644 index 0000000000..cff4330a9c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fgetexcptflg.c @@ -0,0 +1,41 @@ +/* Store current representation for exceptions. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fegetexceptflag (fexcept_t *flagp, int excepts) +{ + unsigned long fpescr; + + /* Get the current state. */ + fpescr = fegetenv_register (); + + *flagp = fpescr & SPEFSCR_ALL_EXCEPT; + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fegetexceptflag, __old_fegetexceptflag) +compat_symbol (libm, __old_fegetexceptflag, fegetexceptflag, GLIBC_2_1); +#endif + +versioned_symbol (libm, __fegetexceptflag, fegetexceptflag, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/flt-rounds.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/flt-rounds.c new file mode 100644 index 0000000000..4fb8d034c4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/flt-rounds.c @@ -0,0 +1,39 @@ +/* Return current rounding mode as correct value for FLT_ROUNDS. e500 + version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <stdlib.h> + +int +__flt_rounds (void) +{ + switch (fegetenv_register () & SPEFSCR_FRMC) + { + case FE_TOWARDZERO: + return 0; + case FE_TONEAREST: + return 1; + case FE_UPWARD: + return 2; + case FE_DOWNWARD: + return 3; + default: + abort (); + } +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcept-soft.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcept-soft.c new file mode 100644 index 0000000000..ef35a9426d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcept-soft.c @@ -0,0 +1,25 @@ +/* Raise given exceptions. e500 version for use from soft-fp. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Aldy Hernandez <aldyh@redhat.com>, 2004. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> +#include <libc-symbols.h> + +#define __FERAISEEXCEPT_INTERNAL __feraiseexcept_soft +#include "spe-raise.c" +libc_hidden_def (__feraiseexcept_soft) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcpt.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcpt.c new file mode 100644 index 0000000000..915642a1dc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcpt.c @@ -0,0 +1,41 @@ +/* Raise given exceptions. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +#define __FERAISEEXCEPT_INTERNAL __feraiseexcept_spe +#include "spe-raise.c" + +libm_hidden_def (__feraiseexcept_spe) + +#undef feraiseexcept +int +__feraiseexcept (int excepts) +{ + return __feraiseexcept_spe (__fexcepts_to_spe (excepts)); +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__feraiseexcept, __old_feraiseexcept) +compat_symbol (libm, __old_feraiseexcept, feraiseexcept, GLIBC_2_1); +#endif + +libm_hidden_def (__feraiseexcept) +libm_hidden_ver (__feraiseexcept, feraiseexcept) +versioned_symbol (libm, __feraiseexcept, feraiseexcept, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fsetexcptflg.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fsetexcptflg.c new file mode 100644 index 0000000000..f1e6a02681 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/fsetexcptflg.c @@ -0,0 +1,55 @@ +/* Set floating-point environment exception handling. e500 version. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__fesetexceptflag (const fexcept_t *flagp, int excepts) +{ + unsigned long old_spefscr, spefscr; + fexcept_t flag; + int excepts_spe = __fexcepts_to_spe (excepts); + + /* Get the current state. */ + old_spefscr = fegetenv_register (); + + /* Ignore exceptions not listed in 'excepts'. */ + flag = *flagp & excepts_spe; + + /* Replace the exception status */ + spefscr = (old_spefscr & ~excepts_spe) | flag; + + /* Store the new status word (along with the rest of the environment). */ + fesetenv_register (spefscr); + + /* If the state of the "invalid" or "underflow" flag has changed, + inform the kernel. */ + if (((spefscr ^ old_spefscr) & (SPEFSCR_FINVS | SPEFSCR_FUNFS)) != 0) + __fe_note_change (); + + /* Success. */ + return 0; +} + +#include <shlib-compat.h> +#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2) +strong_alias (__fesetexceptflag, __old_fesetexceptflag) +compat_symbol (libm, __old_fesetexceptflag, fesetexceptflag, GLIBC_2_1); +#endif + +versioned_symbol (libm, __fesetexceptflag, fesetexceptflag, GLIBC_2_2); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/ftestexcept.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/ftestexcept.c new file mode 100644 index 0000000000..05040d7224 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/ftestexcept.c @@ -0,0 +1,31 @@ +/* Test exception in current environment. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +fetestexcept (int excepts) +{ + unsigned long f; + + /* Get the current state. */ + f = fegetenv_register (); + + return __fexcepts_from_spe (f) & excepts; +} +libm_hidden_def (fetestexcept) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/get-rounding-mode.h b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/get-rounding-mode.h new file mode 100644 index 0000000000..117e7331e9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/get-rounding-mode.h @@ -0,0 +1,4 @@ +/* The generic version of get-rounding-mode.h using fpu_control.h, not + the one using the software rounding mode, is correct for e500. */ + +#include <sysdeps/generic/get-rounding-mode.h> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/s_fabsf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/s_fabsf.S new file mode 100644 index 0000000000..9d00b62923 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/s_fabsf.S @@ -0,0 +1,27 @@ +/* Floating-point absolute value. e500 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__fabsf) +/* float [r3] fabsf (float [r3] x) ; */ + efsabs r3,r3 + blr +END (__fabsf) + +weak_alias (__fabsf, fabsf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/spe-raise.c b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/spe-raise.c new file mode 100644 index 0000000000..cc13c67786 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/e500/nofpu/spe-raise.c @@ -0,0 +1,53 @@ +/* Raise given exceptions, given the SPEFSCR bits for those exceptions. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv_libc.h> + +int +__FERAISEEXCEPT_INTERNAL (int excepts) +{ + unsigned long f; + + f = fegetenv_register (); + f |= (excepts & SPEFSCR_ALL_EXCEPT); + fesetenv_register (f); + + /* Force the operations that cause the exceptions. */ + if ((SPEFSCR_FINVS & excepts) != 0) + /* 0 / 0 */ + asm volatile ("efsdiv %0,%0,%1" : : "r" (0), "r" (0)); + + if ((SPEFSCR_FDBZS & excepts) != 0) + /* 1.0 / 0.0 */ + asm volatile ("efsdiv %0,%0,%1" : : "r" (1.0F), "r" (0)); + + if ((SPEFSCR_FOVFS & excepts) != 0) + /* Largest normalized number plus itself. */ + asm volatile ("efsadd %0,%0,%1" : : "r" (0x7f7fffff), "r" (0x7f7fffff)); + + if ((SPEFSCR_FUNFS & excepts) != 0) + /* Smallest normalized number times itself. */ + asm volatile ("efsmul %0,%0,%1" : : "r" (0x800000), "r" (0x800000)); + + if ((SPEFSCR_FINXS & excepts) != 0) + /* Smallest normalized minus 1.0 raises the inexact flag. */ + asm volatile ("efssub %0,%0,%1" : : "r" (0x00800000), "r" (1.0F)); + + /* Success. */ + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/Makefile new file mode 100644 index 0000000000..e05073970d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/Makefile @@ -0,0 +1,3 @@ +ifeq ($(subdir),misc) +sysdep_routines += fprsave fprrest +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S new file mode 100644 index 0000000000..c01c94dfb7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S @@ -0,0 +1,178 @@ +/* longjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <stap-probe.h> +#define _ASM +#ifdef __NO_VMX__ +# include <novmxsetjmp.h> +#else +# include <jmpbuf-offsets.h> +#endif + + .machine "altivec" +ENTRY (__longjmp_symbol) +#ifndef __NO_VMX__ +# ifdef PIC + mflr r6 + cfi_register (lr,r6) + SETUP_GOT_ACCESS(r5,got_label) + addis r5,r5,_GLOBAL_OFFSET_TABLE_-got_label@ha + addi r5,r5,_GLOBAL_OFFSET_TABLE_-got_label@l +# ifdef SHARED +# if IS_IN (rtld) + /* Inside ld.so we use the local alias to avoid runtime GOT + relocations. */ + lwz r5,_rtld_local_ro@got(r5) +# else + lwz r5,_rtld_global_ro@got(r5) +# endif + mtlr r6 + cfi_same_value (lr) + lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5) +# else + lwz r5,_dl_hwcap@got(r5) + mtlr r6 + cfi_same_value (lr) + lwz r5,LOWORD(r5) +# endif +# else + lis r5,(_dl_hwcap+LOWORD)@ha + lwz r5,(_dl_hwcap+LOWORD)@l(r5) +# endif + andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) + beq L(no_vmx) + la r5,((JB_VRS)*4)(3) + andi. r6,r5,0xf + lwz r0,((JB_VRSAVE)*4)(3) + mtspr VRSAVE,r0 + beq+ L(aligned_restore_vmx) + addi r6,r5,16 + lvsl v0,0,r5 + lvx v1,0,r5 + addi r5,r5,32 + lvx v21,0,r6 + vperm v20,v1,v21,v0 +# define load_misaligned_vmx_lo_loaded(loadvr,lovr,shiftvr,loadgpr,addgpr) \ + addi addgpr,addgpr,32; \ + lvx lovr,0,loadgpr; \ + vperm loadvr,loadvr,lovr,shiftvr; + load_misaligned_vmx_lo_loaded(v21,v22,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v22,v23,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v23,v24,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v24,v25,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v25,v26,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v26,v27,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v27,v28,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v28,v29,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v29,v30,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v30,v31,v0,r6,r5) + lvx v1,0,r5 + vperm v31,v31,v1,v0 + b L(no_vmx) +L(aligned_restore_vmx): + addi r6,r5,16 + lvx v20,0,r5 + addi r5,r5,32 + lvx v21,0,r6 + addi r6,r6,32 + lvx v22,0,r5 + addi r5,r5,32 + lvx v23,0,r6 + addi r6,r6,32 + lvx v24,0,r5 + addi r5,r5,32 + lvx v25,0,r6 + addi r6,r6,32 + lvx v26,0,r5 + addi r5,r5,32 + lvx v27,0,r6 + addi r6,r6,32 + lvx v28,0,r5 + addi r5,r5,32 + lvx v29,0,r6 + addi r6,r6,32 + lvx v30,0,r5 + lvx v31,0,r6 +L(no_vmx): +#endif +#if defined PTR_DEMANGLE || defined CHECK_SP + lwz r24,(JB_GPR1*4)(r3) +# ifdef CHECK_SP +# ifdef PTR_DEMANGLE + PTR_DEMANGLE3 (r24, r24, r25) +# endif + CHECK_SP (r24) + mr r1,r24 +# endif +#else + lwz r1,(JB_GPR1*4)(r3) +#endif + lwz r0,(JB_LR*4)(r3) + lwz r14,((JB_GPRS+0)*4)(r3) + lfd fp14,((JB_FPRS+0*2)*4)(r3) + lwz r15,((JB_GPRS+1)*4)(r3) + lfd fp15,((JB_FPRS+1*2)*4)(r3) + lwz r16,((JB_GPRS+2)*4)(r3) + lfd fp16,((JB_FPRS+2*2)*4)(r3) + lwz r17,((JB_GPRS+3)*4)(r3) + lfd fp17,((JB_FPRS+3*2)*4)(r3) + lwz r18,((JB_GPRS+4)*4)(r3) + lfd fp18,((JB_FPRS+4*2)*4)(r3) + lwz r19,((JB_GPRS+5)*4)(r3) + lfd fp19,((JB_FPRS+5*2)*4)(r3) + lwz r20,((JB_GPRS+6)*4)(r3) + lfd fp20,((JB_FPRS+6*2)*4)(r3) +#ifdef PTR_DEMANGLE +# ifndef CHECK_SP + PTR_DEMANGLE3 (r1, r24, r25) +# endif + PTR_DEMANGLE2 (r0, r25) +#endif + /* longjmp/longjmp_target probe expects longjmp first argument (4@3), + second argument (-4@4), and target address (4@0), respectively. */ + LIBC_PROBE (longjmp, 3, 4@3, -4@4, 4@0) + mtlr r0 + lwz r21,((JB_GPRS+7)*4)(r3) + lfd fp21,((JB_FPRS+7*2)*4)(r3) + lwz r22,((JB_GPRS+8)*4)(r3) + lfd fp22,((JB_FPRS+8*2)*4)(r3) + lwz r5,(JB_CR*4)(r3) + lwz r23,((JB_GPRS+9)*4)(r3) + lfd fp23,((JB_FPRS+9*2)*4)(r3) + lwz r24,((JB_GPRS+10)*4)(r3) + lfd fp24,((JB_FPRS+10*2)*4)(r3) + lwz r25,((JB_GPRS+11)*4)(r3) + lfd fp25,((JB_FPRS+11*2)*4)(r3) + mtcrf 0xFF,r5 + lwz r26,((JB_GPRS+12)*4)(r3) + lfd fp26,((JB_FPRS+12*2)*4)(r3) + lwz r27,((JB_GPRS+13)*4)(r3) + lfd fp27,((JB_FPRS+13*2)*4)(r3) + lwz r28,((JB_GPRS+14)*4)(r3) + lfd fp28,((JB_FPRS+14*2)*4)(r3) + lwz r29,((JB_GPRS+15)*4)(r3) + lfd fp29,((JB_FPRS+15*2)*4)(r3) + lwz r30,((JB_GPRS+16)*4)(r3) + lfd fp30,((JB_FPRS+16*2)*4)(r3) + lwz r31,((JB_GPRS+17)*4)(r3) + lfd fp31,((JB_FPRS+17*2)*4)(r3) + LIBC_PROBE (longjmp_target, 3, 4@3, -4@4, 4@0) + mr r3,r4 + blr +END (__longjmp_symbol) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/__longjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/__longjmp.S new file mode 100644 index 0000000000..0e62245927 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/__longjmp.S @@ -0,0 +1,40 @@ +/* AltiVec/VMX (new) version of __longjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libc-symbols.h> +#include <rtld-global-offsets.h> +#include <shlib-compat.h> + +#if !IS_IN (libc) +/* Build a non-versioned object for rtld-*. */ +# define __longjmp_symbol __longjmp +# include "__longjmp-common.S" + +#else /* IS_IN (libc) */ +strong_alias (__vmx__longjmp, __longjmp) +# define __longjmp_symbol __vmx__longjmp +# include "__longjmp-common.S" + +# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +# define __NO_VMX__ +# undef JB_SIZE +# undef __longjmp_symbol +# define __longjmp_symbol __novmx__longjmp +# include "__longjmp-common.S" +# endif +#endif /* IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/configure b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/configure new file mode 100644 index 0000000000..98c6f30ca3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/configure @@ -0,0 +1,56 @@ +# This file is generated from configure.ac by Autoconf. DO NOT EDIT! + # Local configure fragment for sysdeps/powerpc/powerpc32/fpu. + +# Test whether integer to floating point conversions use fcfid. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for fcfid use" >&5 +$as_echo_n "checking for fcfid use... " >&6; } +if ${libc_cv_ppc_fcfid+:} false; then : + $as_echo_n "(cached) " >&6 +else + echo 'double foo (int x) { return (double) x; }' > conftest.c +libc_cv_ppc_fcfid=no +if { ac_try='${CC-cc} -S $CFLAGS conftest.c -o conftest.s 1>&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + if grep '[ ]fcfid' conftest.s > /dev/null 2>&1; then + libc_cv_ppc_fcfid=yes + fi +fi +rm -rf conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ppc_fcfid" >&5 +$as_echo "$libc_cv_ppc_fcfid" >&6; } +if test $libc_cv_ppc_fcfid = yes; then + $as_echo "#define HAVE_PPC_FCFID 1" >>confdefs.h + +fi + +# Test whether floating point to long long conversions use fctidz. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for fctidz use" >&5 +$as_echo_n "checking for fctidz use... " >&6; } +if ${libc_cv_ppc_fctidz+:} false; then : + $as_echo_n "(cached) " >&6 +else + echo 'long long int foo (double x) { return (long long int) x; }' > conftest.c +libc_cv_ppc_fctidz=no +if { ac_try='${CC-cc} -S $CFLAGS conftest.c -o conftest.s 1>&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + if grep '[ ]fctidz' conftest.s > /dev/null 2>&1; then + libc_cv_ppc_fctidz=yes + fi +fi +rm -rf conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ppc_fctidz" >&5 +$as_echo "$libc_cv_ppc_fctidz" >&6; } +if test $libc_cv_ppc_fctidz = yes; then + $as_echo "#define HAVE_PPC_FCTIDZ 1" >>confdefs.h + +fi diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/configure.ac b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/configure.ac new file mode 100644 index 0000000000..1899705aab --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/configure.ac @@ -0,0 +1,34 @@ +GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. +# Local configure fragment for sysdeps/powerpc/powerpc32/fpu. + +# Test whether integer to floating point conversions use fcfid. +AC_CACHE_CHECK([for fcfid use], [libc_cv_ppc_fcfid], [dnl +echo 'double foo (int x) { return (double) x; }' > conftest.c +libc_cv_ppc_fcfid=no +if AC_TRY_COMMAND(${CC-cc} -S $CFLAGS conftest.c -o conftest.s 1>&AS_MESSAGE_LOG_FD); then +changequote(,)dnl + if grep '[ ]fcfid' conftest.s > /dev/null 2>&1; then + libc_cv_ppc_fcfid=yes + fi +changequote([,])dnl +fi +rm -rf conftest*]) +if test $libc_cv_ppc_fcfid = yes; then + AC_DEFINE([HAVE_PPC_FCFID]) +fi + +# Test whether floating point to long long conversions use fctidz. +AC_CACHE_CHECK([for fctidz use], [libc_cv_ppc_fctidz], [dnl +echo 'long long int foo (double x) { return (long long int) x; }' > conftest.c +libc_cv_ppc_fctidz=no +if AC_TRY_COMMAND(${CC-cc} -S $CFLAGS conftest.c -o conftest.s 1>&AS_MESSAGE_LOG_FD); then +changequote(,)dnl + if grep '[ ]fctidz' conftest.s > /dev/null 2>&1; then + libc_cv_ppc_fctidz=yes + fi +changequote([,])dnl +fi +rm -rf conftest*]) +if test $libc_cv_ppc_fctidz = yes; then + AC_DEFINE([HAVE_PPC_FCTIDZ]) +fi diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/fix-int-fp-convert-zero.h b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/fix-int-fp-convert-zero.h new file mode 100644 index 0000000000..bd3eb62bf5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/fix-int-fp-convert-zero.h @@ -0,0 +1,28 @@ +/* Fix for conversion of integer 0 to floating point. PowerPC version. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef FIX_INT_FP_CONVERT_ZERO_H +#define FIX_INT_FP_CONVERT_ZERO_H 1 + +/* The code sequences GCC generates for conversion of integers to + floating point result in -0 instead of +0 in FE_DOWNWARD mode when + the fcfid instruction is not used, as of GCC 5. See + <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67771>. */ +#define FIX_INT_FP_CONVERT_ZERO (!HAVE_PPC_FCFID) + +#endif /* fix-int-fp-convert-zero.h */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/fprrest.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/fprrest.S new file mode 100644 index 0000000000..7b2346471b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/fprrest.S @@ -0,0 +1,94 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + Floating Point Registers (FPRs) restore routine +*/ + +#include <sysdep.h> + +ENTRY(_restfpr_all) + .globl C_TEXT(_restf14) + .globl C_TEXT(_restfpr_14) +C_TEXT(_restf14): +C_TEXT(_restfpr_14): lfd fp14,-144(r1) + .globl C_TEXT(_restf15) + .globl C_TEXT(_restfpr_15) +C_TEXT(_restf15): +C_TEXT(_restfpr_15): lfd fp15,-136(r1) + .globl C_TEXT(_restf16) + .globl C_TEXT(_restfpr_16) +C_TEXT(_restf16): +C_TEXT(_restfpr_16): lfd fp16,-128(r1) + .globl C_TEXT(_restf17) + .globl C_TEXT(_restfpr_17) +C_TEXT(_restf17): +C_TEXT(_restfpr_17): lfd fp17,-120(r1) + .globl C_TEXT(_restf18) + .globl C_TEXT(_restfpr_18) +C_TEXT(_restf18): +C_TEXT(_restfpr_18): lfd fp18,-112(r1) + .globl C_TEXT(_restf19) + .globl C_TEXT(_restfpr_19) +C_TEXT(_restf19): +C_TEXT(_restfpr_19): lfd fp19,-104(r1) + .globl C_TEXT(_restf20) + .globl C_TEXT(_restfpr_20) +C_TEXT(_restf20): +C_TEXT(_restfpr_20): lfd fp20,-96(r1) + .globl C_TEXT(_restf21) + .globl C_TEXT(_restfpr_21) +C_TEXT(_restf21): +C_TEXT(_restfpr_21): lfd fp21,-88(r1) + .globl C_TEXT(_restf22) + .globl C_TEXT(_restfpr_22) +C_TEXT(_restf22): +C_TEXT(_restfpr_22): lfd fp22,-80(r1) + .globl C_TEXT(_restf23) + .globl C_TEXT(_restfpr_23) +C_TEXT(_restf23): +C_TEXT(_restfpr_23): lfd fp23,-72(r1) + .globl C_TEXT(_restf24) + .globl C_TEXT(_restfpr_24) +C_TEXT(_restf24): +C_TEXT(_restfpr_24): lfd fp24,-64(r1) + .globl C_TEXT(_restf25) + .globl C_TEXT(_restfpr_25) +C_TEXT(_restf25): +C_TEXT(_restfpr_25): lfd fp25,-56(r1) + .globl C_TEXT(_restf26) + .globl C_TEXT(_restfpr_26) +C_TEXT(_restf26): +C_TEXT(_restfpr_26): lfd fp26,-48(r1) + .globl C_TEXT(_restf27) + .globl C_TEXT(_restfpr_27) +C_TEXT(_restf27): +C_TEXT(_restfpr_27): lfd fp27,-40(r1) + .globl C_TEXT(_restf28) + .globl C_TEXT(_restfpr_28) +C_TEXT(_restf28): +C_TEXT(_restfpr_28): lfd fp28,-32(r1) + .globl C_TEXT(_restf29) + .globl C_TEXT(_restfpr_29) +C_TEXT(_restf29): +C_TEXT(_restfpr_29): lwz r0,8(r1) #get return address from frame + lfd fp29,-24(r1) #restore f29 + mtlr r0 #move return address to LR + lfd fp30,-16(r1) #restore f30 + lfd fp31,-8(r1) #restore f31 + blr #return +END (_restfpr_all) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/fprsave.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/fprsave.S new file mode 100644 index 0000000000..975a8216cb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/fprsave.S @@ -0,0 +1,111 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + Floating Point Registers (FPRs) save routine +*/ + +#include <sysdep.h> + +ENTRY(_savefpr_all) + .globl C_TEXT(_savef14) + .globl C_TEXT(_savefpr_14) +C_TEXT(_savef14): +C_TEXT(_savefpr_14): stfd fp14,-144(r1) + cfi_offset(fp14,-144) + .globl C_TEXT(_savef15) + .globl C_TEXT(_savefpr_15) +C_TEXT(_savef15): +C_TEXT(_savefpr_15): stfd fp15,-136(r1) + cfi_offset(fp15,-136) + .globl C_TEXT(_savef16) + .globl C_TEXT(_savefpr_16) +C_TEXT(_savef16): +C_TEXT(_savefpr_16): stfd fp16,-128(r1) + cfi_offset(fp16,-128) + .globl C_TEXT(_savef17) + .globl C_TEXT(_savefpr_17) +C_TEXT(_savef17): +C_TEXT(_savefpr_17): stfd fp17,-120(r1) + cfi_offset(fp17,-120) + .globl C_TEXT(_savef18) + .globl C_TEXT(_savefpr_18) +C_TEXT(_savef18): +C_TEXT(_savefpr_18): stfd fp18,-112(r1) + cfi_offset(fp18,-112) + .globl C_TEXT(_savef19) + .globl C_TEXT(_savefpr_19) +C_TEXT(_savef19): +C_TEXT(_savefpr_19): stfd fp19,-104(r1) + cfi_offset(fp19,-104) + .globl C_TEXT(_savef20) + .globl C_TEXT(_savefpr_20) +C_TEXT(_savef20): +C_TEXT(_savefpr_20): stfd fp20,-96(r1) + cfi_offset(fp20,-96) + .globl C_TEXT(_savef21) + .globl C_TEXT(_savefpr_21) +C_TEXT(_savef21): +C_TEXT(_savefpr_21): stfd fp21,-88(r1) + cfi_offset(fp21,-88) + .globl C_TEXT(_savef22) + .globl C_TEXT(_savefpr_22) +C_TEXT(_savef22): +C_TEXT(_savefpr_22): stfd fp22,-80(r1) + cfi_offset(fp22,-80) + .globl C_TEXT(_savef23) + .globl C_TEXT(_savefpr_23) +C_TEXT(_savef23): +C_TEXT(_savefpr_23): stfd fp23,-72(r1) + cfi_offset(fp23,-72) + .globl C_TEXT(_savef24) + .globl C_TEXT(_savefpr_24) +C_TEXT(_savef24): +C_TEXT(_savefpr_24): stfd fp24,-64(r1) + cfi_offset(fp24,-64) + .globl C_TEXT(_savef25) + .globl C_TEXT(_savefpr_25) +C_TEXT(_savef25): +C_TEXT(_savefpr_25): stfd fp25,-56(r1) + cfi_offset(fp25,-56) + .globl C_TEXT(_savef26) + .globl C_TEXT(_savefpr_26) +C_TEXT(_savef26): +C_TEXT(_savefpr_26): stfd fp26,-48(r1) + cfi_offset(fp26,-48) + .globl C_TEXT(_savef27) + .globl C_TEXT(_savefpr_27) +C_TEXT(_savef27): +C_TEXT(_savefpr_27): stfd fp27,-40(r1) + cfi_offset(fp27,-40) + .globl C_TEXT(_savef28) + .globl C_TEXT(_savefpr_28) +C_TEXT(_savef28): +C_TEXT(_savefpr_28): stfd fp28,-32(r1) + cfi_offset(fp28,-32) + .globl C_TEXT(_savef29) + .globl C_TEXT(_savefpr_29) +C_TEXT(_savef29): +C_TEXT(_savefpr_29): stfd fp29,-24(r1) #save f29 + stfd fp30,-16(r1) #save f30 + stfd fp31,-8(r1) #save f31 + cfi_offset(fp29,-24) + cfi_offset(fp30,-16) + cfi_offset(fp31,-8) + stw r0,8(r1) #save LR in callers frame + blr #return +END (_savefpr_all) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_ceil.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_ceil.S new file mode 100644 index 0000000000..51b8c21027 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_ceil.S @@ -0,0 +1,83 @@ +/* ceil function. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**52 */ + .long 0x59800000 + + .section ".text" +ENTRY (__ceil) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfs fp13,.LC0-got_label@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,2 /* Set rounding mode toward +inf. */ + ble- cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__ceil) + +weak_alias (__ceil, ceil) + +#ifdef NO_LONG_DOUBLE +weak_alias (__ceil, ceill) +strong_alias (__ceil, __ceill) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __ceil, ceill, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S new file mode 100644 index 0000000000..9d8d8aa294 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S @@ -0,0 +1,75 @@ +/* float ceil function. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**23 */ + .long 0x4b000000 + + .section ".text" +ENTRY (__ceilf) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfs fp13,.LC0-got_label@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,2 /* Set rounding mode toward +inf. */ + ble- cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__ceilf) + +weak_alias (__ceilf, ceilf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_copysign.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_copysign.S new file mode 100644 index 0000000000..850dded3b6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_copysign.S @@ -0,0 +1,59 @@ +/* Copy a sign bit between floating-point values. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__copysign) +/* double [f1] copysign (double [f1] x, double [f2] y); + copysign(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + stfd fp2,8(r1) + lwz r3,8+HIWORD(r1) + cmpwi r3,0 + addi r1,r1,16 + cfi_adjust_cfa_offset (-16) + blt L(0) + fabs fp1,fp1 + blr +L(0): fnabs fp1,fp1 + blr + END (__copysign) + +weak_alias (__copysign,copysign) + +/* It turns out that it's safe to use this code even for single-precision. */ +weak_alias (__copysign,copysignf) +strong_alias(__copysign,__copysignf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__copysign,copysignl) +strong_alias(__copysign,__copysignl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __copysign, copysignl, GLIBC_2_0) +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __copysign, copysignl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_copysignf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_copysignf.S new file mode 100644 index 0000000000..e05438ae7d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_copysignf.S @@ -0,0 +1 @@ +/* __copysignf is in s_copysign.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S new file mode 100644 index 0000000000..272032b49e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S @@ -0,0 +1,66 @@ +/* Copy a sign bit between floating-point values. + IBM extended format long double version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__copysignl) +/* long double [f1,f2] copysign (long double [f1,f2] x, long double [f3,f4] y); + copysign(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ +#ifdef _ARCH_PPCGR + /* fsel available. */ + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + stfd fp3,8(r1) + fmr fp0,fp1 + fabs fp1,fp1 + lwz r3,8+HIWORD(r1) + cmpwi cr6,r3,0 + addi r1,r1,16 + cfi_adjust_cfa_offset (-16) + fneg fp3,fp2 + fsel fp2,fp0,fp2,fp3 + bgelr cr6 + fneg fp1,fp1 + fneg fp2,fp2 + blr +#else + stwu r1,-32(r1) + cfi_adjust_cfa_offset (32) + stfd fp3,8(r1) + stfd fp1,16(r1) + lwz r3,8+HIWORD(r1) + lwz r4,16+HIWORD(r1) + xor r3,r3,r4 + cmpwi cr6,r3,0 + addi r1,r1,32 + cfi_adjust_cfa_offset (-32) + bgelr cr6 + fneg fp1,fp1 + fneg fp2,fp2 + blr +#endif +END (__copysignl) + +#if IS_IN (libm) +long_double_symbol (libm, __copysignl, copysignl) +#else +long_double_symbol (libc, __copysignl, copysignl) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_fabs.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_fabs.S new file mode 100644 index 0000000000..53d21301ee --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_fabs.S @@ -0,0 +1,5 @@ +#include <math_ldbl_opt.h> +#include <sysdeps/powerpc/fpu/s_fabs.S> +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __fabs, fabsl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_fabsl.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_fabsl.S new file mode 100644 index 0000000000..75608ec70c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_fabsl.S @@ -0,0 +1,52 @@ +/* Copy a sign bit between floating-point values. + IBM extended format long double version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__fabsl) +/* long double [f1,f2] fabs (long double [f1,f2] x); + fabs(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ +#ifdef _ARCH_PPCGR + /* fsel available. */ + fmr fp0,fp1 +#else + /* Use integer operations to test sign of high part to avoid + exceptions on sNaNs. */ + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + stfd fp1,8(r1) +#endif + fabs fp1,fp1 +#ifdef _ARCH_PPCGR + fneg fp3,fp2 + fsel fp2,fp0,fp2,fp3 +#else + lwz r3,8+HIWORD(r1) + cmpwi cr6,r3,0 + addi r1,r1,16 + cfi_adjust_cfa_offset (-16) + bgelr cr6 + fneg fp2,fp2 +#endif + blr +END (__fabsl) + +long_double_symbol (libm, __fabsl, fabsl) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_floor.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_floor.S new file mode 100644 index 0000000000..90a1b184df --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_floor.S @@ -0,0 +1,83 @@ +/* Floor function. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**52 */ + .long 0x59800000 + + .section ".text" +ENTRY (__floor) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfs fp13,.LC0-got_label@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,3 /* Set rounding mode toward -inf. */ + ble- cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__floor) + +weak_alias (__floor, floor) + +#ifdef NO_LONG_DOUBLE +weak_alias (__floor, floorl) +strong_alias (__floor, __floorl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __floor, floorl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_floorf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_floorf.S new file mode 100644 index 0000000000..b87e3bf33e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_floorf.S @@ -0,0 +1,75 @@ +/* float Floor function. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**23 */ + .long 0x4b000000 + + .section ".text" +ENTRY (__floorf) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfs fp13,.LC0-got_label@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,3 /* Set rounding mode toward -inf. */ + ble- cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__floorf) + +weak_alias (__floorf, floorf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_fma.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_fma.S new file mode 100644 index 0000000000..d40695c633 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_fma.S @@ -0,0 +1,5 @@ +#include <math_ldbl_opt.h> +#include <sysdeps/powerpc/fpu/s_fma.S> +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __fma, fmal, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_isnan.S new file mode 100644 index 0000000000..363535dcdb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_isnan.S @@ -0,0 +1,57 @@ +/* isnan(). PowerPC32 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power4 +EALIGN (__isnan, 4, 0) + mffs fp0 + mtfsb0 4*cr6+lt /* reset_fpscr_bit (FPSCR_VE) */ + fcmpu cr7,fp1,fp1 + mtfsf 255,fp0 + li r3,0 + beqlr+ cr7 /* (x == x) then not a NAN */ + li r3,1 /* else must be a NAN */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +#ifndef __isnan +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) +#endif + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llrint.c b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llrint.c new file mode 100644 index 0000000000..13d150cd68 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llrint.c @@ -0,0 +1,63 @@ +/* Round a double value to a long long in the current rounding mode. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <limits.h> +#include <math.h> +#include <math_ldbl_opt.h> +#include <math_private.h> +#include <stdint.h> + +long long int +__llrint (double x) +{ + double rx = __rint (x); + if (HAVE_PPC_FCTIDZ || rx != x) + return (long long int) rx; + else + { + /* Avoid incorrect exceptions from libgcc conversions (as of GCC + 5): <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59412>. */ + if (fabs (rx) < 0x1p31) + return (long long int) (long int) rx; + uint64_t i0; + EXTRACT_WORDS64 (i0, rx); + int exponent = ((i0 >> 52) & 0x7ff) - 0x3ff; + if (exponent < 63) + { + unsigned long long int mant + = (i0 & ((1ULL << 52) - 1)) | (1ULL << 52); + if (exponent < 52) + mant >>= 52 - exponent; + else + mant <<= exponent - 52; + return (long long int) ((i0 & (1ULL << 63)) != 0 ? -mant : mant); + } + else if (rx == (double) LLONG_MIN) + return LLONG_MIN; + else + return (long long int) (long int) rx << 32; + } +} +weak_alias (__llrint, llrint) +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llrintf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llrintf.c new file mode 100644 index 0000000000..46365452a8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llrintf.c @@ -0,0 +1,46 @@ +/* Round a float value to a long long in the current rounding mode. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <stdint.h> + +long long int +__llrintf (float x) +{ + float rx = __rintf (x); + if (HAVE_PPC_FCTIDZ || rx != x) + return (long long int) rx; + else + { + float arx = fabsf (rx); + /* Avoid incorrect exceptions from libgcc conversions (as of GCC + 5): <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59412>. */ + if (arx < 0x1p31f) + return (long long int) (long int) rx; + else if (!(arx < 0x1p55f)) + return (long long int) (long int) (rx * 0x1p-32f) << 32; + uint32_t i0; + GET_FLOAT_WORD (i0, rx); + int exponent = ((i0 >> 23) & 0xff) - 0x7f; + unsigned long long int mant = (i0 & 0x7fffff) | 0x800000; + mant <<= exponent - 23; + return (long long int) ((i0 & 0x80000000) != 0 ? -mant : mant); + } +} +weak_alias (__llrintf, llrintf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llround.c b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llround.c new file mode 100644 index 0000000000..5e5a237b0c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llround.c @@ -0,0 +1,90 @@ +/* Round double value to long long int. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <limits.h> +#include <math.h> +#include <math_ldbl_opt.h> +#include <math_private.h> +#include <stdint.h> + +/* Round to the nearest integer, with values exactly on a 0.5 boundary + rounded away from zero, regardless of the current rounding mode. + If (long long)x, when x is out of range of a long long, clips at + LLONG_MAX or LLONG_MIN, then this implementation also clips. */ + +long long int +__llround (double x) +{ + long long xr; + if (HAVE_PPC_FCTIDZ) + xr = (long long) x; + else + { + /* Avoid incorrect exceptions from libgcc conversions (as of GCC + 5): <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59412>. */ + if (fabs (x) < 0x1p31) + xr = (long long int) (long int) x; + else + { + uint64_t i0; + EXTRACT_WORDS64 (i0, x); + int exponent = ((i0 >> 52) & 0x7ff) - 0x3ff; + if (exponent < 63) + { + unsigned long long int mant + = (i0 & ((1ULL << 52) - 1)) | (1ULL << 52); + if (exponent < 52) + /* llround is not required to raise "inexact". */ + mant >>= 52 - exponent; + else + mant <<= exponent - 52; + xr = (long long int) ((i0 & (1ULL << 63)) != 0 ? -mant : mant); + } + else if (x == (double) LLONG_MIN) + xr = LLONG_MIN; + else + xr = (long long int) (long int) x << 32; + } + } + /* Avoid spurious "inexact" converting LLONG_MAX to double, and from + subtraction when the result is out of range, by returning early + for arguments large enough that no rounding is needed. */ + if (!(fabs (x) < 0x1p52)) + return xr; + double xrf = (double) xr; + + if (x >= 0.0) + { + if (x - xrf >= 0.5) + xr += (long long) ((unsigned long long) xr + 1) > 0; + } + else + { + if (xrf - x >= 0.5) + xr -= (long long) ((unsigned long long) xr - 1) < 0; + } + return xr; +} +weak_alias (__llround, llround) +#ifdef NO_LONG_DOUBLE +strong_alias (__llround, __llroundl) +weak_alias (__llround, llroundl) +#endif +#if LONG_DOUBLE_COMPAT (libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llroundf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llroundf.c new file mode 100644 index 0000000000..55452bac73 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_llroundf.c @@ -0,0 +1,72 @@ +/* Round float value to long long int. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_private.h> +#include <stdint.h> + +/* Round to the nearest integer, with values exactly on a 0.5 boundary + rounded away from zero, regardless of the current rounding mode. + If (long long)x, when x is out of range of a long long, clips at + LLONG_MAX or LLONG_MIN, then this implementation also clips. */ + +long long int +__llroundf (float x) +{ + long long xr; + if (HAVE_PPC_FCTIDZ) + xr = (long long) x; + else + { + float ax = fabsf (x); + /* Avoid incorrect exceptions from libgcc conversions (as of GCC + 5): <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59412>. */ + if (ax < 0x1p31f) + xr = (long long int) (long int) x; + else if (!(ax < 0x1p55f)) + xr = (long long int) (long int) (x * 0x1p-32f) << 32; + else + { + uint32_t i0; + GET_FLOAT_WORD (i0, x); + int exponent = ((i0 >> 23) & 0xff) - 0x7f; + unsigned long long int mant = (i0 & 0x7fffff) | 0x800000; + mant <<= exponent - 23; + xr = (long long int) ((i0 & 0x80000000) != 0 ? -mant : mant); + } + } + /* Avoid spurious "inexact" converting LLONG_MAX to float, and from + subtraction when the result is out of range, by returning early + for arguments large enough that no rounding is needed. */ + if (!(fabsf (x) < 0x1p23f)) + return xr; + float xrf = (float) xr; + + if (x >= 0.0) + { + if (x - xrf >= 0.5) + xr += (long long) ((unsigned long long) xr + 1) > 0; + } + else + { + if (xrf - x >= 0.5) + xr -= (long long) ((unsigned long long) xr - 1) < 0; + } + return xr; +} +weak_alias (__llroundf, llroundf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_lrint.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_lrint.S new file mode 100644 index 0000000000..8d54d95b6a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_lrint.S @@ -0,0 +1,46 @@ +/* Round double to long int. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long int[r3] __lrint (double x[fp1]) */ +ENTRY (__lrint) + stwu r1,-16(r1) + fctiw fp13,fp1 + stfd fp13,8(r1) + nop /* Ensure the following load is in a different dispatch group */ + nop /* to avoid pipe stall on POWER4&5. */ + nop + lwz r3,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__lrint) + +weak_alias (__lrint, lrint) + +strong_alias (__lrint, __lrintf) +weak_alias (__lrint, lrintf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__lrint, __lrintl) +weak_alias (__lrint, lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_lround.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_lround.S new file mode 100644 index 0000000000..e4ec1bb0b6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_lround.S @@ -0,0 +1,129 @@ +/* lround function. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 0.5 */ + .long 0x3f000000 +.LC1: /* 2^52. */ + .long 0x59800000 + .section .rodata.cst8,"aM",@progbits,8 + .align 3 +.LC2: /* 0x7fffffff.8p0. */ + .long 0x41dfffff + .long 0xffe00000 +.LC3: /* -0x80000000.8p0. */ + .long 0xc1e00000 + .long 0x00100000 + .section ".text" + +/* long [r3] lround (float x [fp1]) + IEEE 1003.1 lround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "round to Nearest" mode. Instead we set + "round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. It is necessary to detect when x is + (+-)0x1.fffffffffffffp-2 because adding +-0.5 in this case will + cause an erroneous shift, carry and round. We simply return 0 if + 0.5 > x > -0.5. */ + +ENTRY (__lround) + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r10,r9,.LC0-got_label@ha + lfs fp10,.LC0-got_label@l(r10) + addis r10,r9,.LC1-got_label@ha + lfs fp11,.LC1-got_label@l(r10) + addis r10,r9,.LC2-got_label@ha + lfd fp9,.LC2-got_label@l(r10) + addis r10,r9,.LC3-got_label@ha + lfd fp8,.LC3-got_label@l(r10) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp10,.LC0@l(r9) + lis r9,.LC1@ha + lfs fp11,.LC1@l(r9) + lis r9,.LC2@ha + lfd fp9,.LC2@l(r9) + lis r9,.LC3@ha + lfd fp8,.LC3@l(r9) +#endif + fabs fp2, fp1 /* Get the absolute value of x. */ + fsub fp12,fp10,fp10 /* Compute 0.0. */ + fcmpu cr6, fp2, fp10 /* if |x| < 0.5 */ + fcmpu cr5, fp1, fp9 /* if x >= 0x7fffffff.8p0 */ + fcmpu cr1, fp1, fp8 /* if x <= -0x80000000.8p0 */ + fcmpu cr7, fp1, fp12 /* x is negative? x < 0.0 */ + blt- cr6,.Lretzero + bge- cr5,.Loflow + ble- cr1,.Loflow + /* Test whether an integer to avoid spurious "inexact". */ + fadd fp3,fp2,fp11 + fsub fp3,fp3,fp11 + fcmpu cr5, fp2, fp3 + beq cr5,.Lnobias + fadd fp3,fp2,fp10 /* |x|+=0.5 bias to prepare to round. */ + bge cr7,.Lconvert /* x is positive so don't negate x. */ + fnabs fp3,fp3 /* -(|x|+=0.5) */ +.Lconvert: + fctiwz fp4,fp3 /* Convert to Integer word lround toward 0. */ + stfd fp4,8(r1) + nop /* Ensure the following load is in a different dispatch */ + nop /* group to avoid pipe stall on POWER4&5. */ + nop + lwz r3,8+LOWORD(r1) /* Load return as integer. */ +.Lout: + addi r1,r1,16 + blr +.Lretzero: /* when 0.5 > x > -0.5 */ + li r3,0 /* return 0. */ + b .Lout +.Lnobias: + fmr fp3,fp1 + b .Lconvert +.Loflow: + fmr fp3,fp11 + bge cr7,.Lconvert + fnabs fp3,fp3 + b .Lconvert + END (__lround) + +weak_alias (__lround, lround) + +strong_alias (__lround, __lroundf) +weak_alias (__lround, lroundf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_lroundf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_lroundf.S new file mode 100644 index 0000000000..6289e0be58 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_lroundf.S @@ -0,0 +1 @@ +/* __lroundf is in s_lround.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S new file mode 100644 index 0000000000..df590e08bd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_nearbyint.S @@ -0,0 +1,87 @@ +/* Round to int floating-point values. PowerPC32 version. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + +/* double [fp1] nearbyint(double [fp1] x) */ + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**52 */ + .long 0x59800000 /* TWO52: 2**52 */ + + .section ".text" +ENTRY (__nearbyint) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + bcl 20,31,1f +1: mflr r9 + addis r9,r9,.LC0-1b@ha + lfs fp13,.LC0-1b@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52 */ + bge cr7,.L10 + fcmpu cr7,fp1,fp12 /* if (x > 0.0 */ + ble cr7,L(lessthanzero) + mffs fp11 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fadd fp1,fp1,fp13 /* x += TWO52 */ + fsub fp1,fp1,fp13 /* x -= TWO52 */ + fabs fp1,fp1 /* if (x == 0.0 */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr +L(lessthanzero): + bgelr cr7 + mffs fp11 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fsub fp1,fp1,fp13 /* x -= TWO52 */ + fadd fp1,fp1,fp13 /* x += TWO52 */ + fnabs fp1,fp1 /* if (x == 0.0) */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr +END (__nearbyint) + +weak_alias (__nearbyint, nearbyint) + +#ifdef NO_LONG_DOUBLE +weak_alias (__nearbyint, nearbyintl) +strong_alias (__nearbyint, __nearbyintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __nearbyint, nearbyintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S new file mode 100644 index 0000000000..fb4c6e4cee --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_nearbyintf.S @@ -0,0 +1,78 @@ +/* Round to int floating-point values. PowerPC32 version. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> + + +/* float [fp1] nearbyintf(float [fp1] x) */ + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: + .long 0x4B000000 /* TWO23: 2**23 */ + + .section ".text" +ENTRY (__nearbyintf) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + bcl 20,31,1f +1: mflr r9 + addis r9,r9,.LC0-1b@ha + lfs fp13,.LC0-1b@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23 */ + bge cr7,.L10 + fcmpu cr7,fp1,fp12 /* if (x > 0.0 */ + ble cr7,L(lessthanzero) + mffs fp11 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fadds fp1,fp1,fp13 /* x += TWO23 */ + fsubs fp1,fp1,fp13 /* x -= TWO23 */ + fabs fp1,fp1 /* if (x == 0.0) */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr +L(lessthanzero): + bgelr cr7 + mffs fp11 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fsubs fp1,fp1,fp13 /* x -= TWO23 */ + fadds fp1,fp1,fp13 /* x += TWO23 */ + fnabs fp1,fp1 /* if (x == 0.0) */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr +END (__nearbyintf) + +weak_alias (__nearbyintf, nearbyintf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_rint.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_rint.S new file mode 100644 index 0000000000..a1c3116447 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_rint.S @@ -0,0 +1,76 @@ +/* Round to int floating-point values. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**52 */ + .long 0x59800000 + + .section ".text" +ENTRY (__rint) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfs fp13,.LC0-got_label@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl cr7,.L10 + bng cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = 0.0; */ +.L4: + bnllr cr6 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = -0.0; */ +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__rint) + +weak_alias (__rint, rint) + +#ifdef NO_LONG_DOUBLE +weak_alias (__rint, rintl) +strong_alias (__rint, __rintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __rint, rintl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_rintf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_rintf.S new file mode 100644 index 0000000000..70e52e894d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_rintf.S @@ -0,0 +1,65 @@ +/* Round float to int floating-point values. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**23 */ + .long 0x4b000000 + + .section ".text" +ENTRY (__rintf) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfs fp13,.LC0-got_label@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl cr7,.L10 + bng cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = 0.0; */ +.L4: + bnllr cr6 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = -0.0; */ +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__rintf) + +weak_alias (__rintf, rintf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_round.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_round.S new file mode 100644 index 0000000000..f539890b17 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_round.S @@ -0,0 +1,104 @@ +/* round function. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section .rodata.cst8,"aM",@progbits,8 + .align 2 +.LC0: /* 2**52 */ + .long 0x59800000 +.LC1: /* 0.5 */ + .long 0x3f000000 + +/* double [fp1] round (double x [fp1]) + IEEE 1003.1 round function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "Round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "Round to Nearest" mode. Instead we set + "Round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. */ + + .section ".text" +ENTRY (__round) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + addi r9,r9,.LC0-got_label@l + mtlr r11 + cfi_same_value (lr) + lfs fp13,0(r9) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding mode toward 0. */ +#ifdef SHARED + lfs fp10,.LC1-.LC0(r9) +#else + lis r9,.LC1@ha + lfs fp10,.LC1@l(r9) +#endif + ble- cr6,.L4 + fadd fp1,fp1,fp10 /* x+= 0.5; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + fsub fp9,fp1,fp10 /* x+= 0.5; */ + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp9,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__round) + +weak_alias (__round, round) + +#ifdef NO_LONG_DOUBLE +weak_alias (__round, roundl) +strong_alias (__round, __roundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __round, roundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_roundf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_roundf.S new file mode 100644 index 0000000000..5daf84b598 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_roundf.S @@ -0,0 +1,95 @@ +/* roundf function. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section .rodata.cst8,"aM",@progbits,8 + .align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 +.LC1: /* 0.5 */ + .long 0x3f000000 + +/* float [fp1] roundf (float x [fp1]) + IEEE 1003.1 round function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "Round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "Round to Nearest" mode. Instead we set + "Round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. */ + + .section ".text" +ENTRY (__roundf ) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + addi r9,r9,.LC0-got_label@l + mtlr r11 + cfi_same_value (lr) + lfs fp13,0(r9) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding mode toward 0. */ +#ifdef SHARED + lfs fp10,.LC1-.LC0(r9) +#else + lfs fp10,.LC1@l(r9) +#endif + ble- cr6,.L4 + fadds fp1,fp1,fp10 /* x+= 0.5; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + fsubs fp9,fp1,fp10 /* x+= 0.5; */ + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp9,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__roundf) + +weak_alias (__roundf, roundf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_trunc.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_trunc.S new file mode 100644 index 0000000000..85d292c03c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_trunc.S @@ -0,0 +1,90 @@ +/* trunc function. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**52 */ + .long 0x59800000 + +/* double [fp1] trunc (double x [fp1]) + IEEE 1003.1 trunc function. IEEE specifies "trunc to the integer + value, in floating format, nearest to but no larger in magnitude + then the argument." + We set "round toward Zero" mode and trunc by adding +-2**52 then + subtracting +-2**52. */ + + .section ".text" +ENTRY (__trunc) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfs fp13,.LC0-got_label@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding toward 0 mode. */ + ble- cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__trunc) + +weak_alias (__trunc, trunc) + +#ifdef NO_LONG_DOUBLE +weak_alias (__trunc, truncl) +strong_alias (__trunc, __truncl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __trunc, truncl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_truncf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_truncf.S new file mode 100644 index 0000000000..9b91e3f0a5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/s_truncf.S @@ -0,0 +1,82 @@ +/* truncf function. PowerPC32 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section .rodata.cst4,"aM",@progbits,4 + .align 2 +.LC0: /* 2**23 */ + .long 0x4b000000 + +/* float [fp1] truncf (float x [fp1]) + IEEE 1003.1 trunc function. IEEE specifies "trunc to the integer + value, in floating format, nearest to but no larger in magnitude + then the argument." + We set "round toward Zero" mode and trunc by adding +-2**23 then + subtracting +-2**23. */ + + .section ".text" +ENTRY (__truncf) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfs fp13,.LC0-got_label@l(r9) + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfs fp13,.LC0@l(r9) +#endif + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding toward 0 mode. */ + ble- cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__truncf) + +weak_alias (__truncf, truncf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S new file mode 100644 index 0000000000..19ad07ee56 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S @@ -0,0 +1,183 @@ +/* setjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <stap-probe.h> +#define _ASM +#ifdef __NO_VMX__ +# include <novmxsetjmp.h> +#else +# include <jmpbuf-offsets.h> +#endif + + .machine "altivec" +ENTRY (__sigsetjmp_symbol) + +#ifdef PTR_MANGLE + mr r5,r1 + PTR_MANGLE(r5, r6) + stw r5,(JB_GPR1*4)(3) +#else + stw r1,(JB_GPR1*4)(3) +#endif + mflr r0 + /* setjmp probe expects longjmp first argument (4@3), second argument + (-4@4), and target address (4@0), respectively. */ + LIBC_PROBE (setjmp, 3, 4@3, -4@4, 4@0) + stw r14,((JB_GPRS+0)*4)(3) + stfd fp14,((JB_FPRS+0*2)*4)(3) +#ifdef PTR_MANGLE + PTR_MANGLE2 (r0, r6) +#endif + stw r0,(JB_LR*4)(3) + stw r15,((JB_GPRS+1)*4)(3) + stfd fp15,((JB_FPRS+1*2)*4)(3) + mfcr r0 + stw r16,((JB_GPRS+2)*4)(3) + stfd fp16,((JB_FPRS+2*2)*4)(3) + stw r0,(JB_CR*4)(3) + stw r17,((JB_GPRS+3)*4)(3) + stfd fp17,((JB_FPRS+3*2)*4)(3) + stw r18,((JB_GPRS+4)*4)(3) + stfd fp18,((JB_FPRS+4*2)*4)(3) + stw r19,((JB_GPRS+5)*4)(3) + stfd fp19,((JB_FPRS+5*2)*4)(3) + stw r20,((JB_GPRS+6)*4)(3) + stfd fp20,((JB_FPRS+6*2)*4)(3) + stw r21,((JB_GPRS+7)*4)(3) + stfd fp21,((JB_FPRS+7*2)*4)(3) + stw r22,((JB_GPRS+8)*4)(3) + stfd fp22,((JB_FPRS+8*2)*4)(3) + stw r23,((JB_GPRS+9)*4)(3) + stfd fp23,((JB_FPRS+9*2)*4)(3) + stw r24,((JB_GPRS+10)*4)(3) + stfd fp24,((JB_FPRS+10*2)*4)(3) + stw r25,((JB_GPRS+11)*4)(3) + stfd fp25,((JB_FPRS+11*2)*4)(3) + stw r26,((JB_GPRS+12)*4)(3) + stfd fp26,((JB_FPRS+12*2)*4)(3) + stw r27,((JB_GPRS+13)*4)(3) + stfd fp27,((JB_FPRS+13*2)*4)(3) + stw r28,((JB_GPRS+14)*4)(3) + stfd fp28,((JB_FPRS+14*2)*4)(3) + stw r29,((JB_GPRS+15)*4)(3) + stfd fp29,((JB_FPRS+15*2)*4)(3) + stw r30,((JB_GPRS+16)*4)(3) + stfd fp30,((JB_FPRS+16*2)*4)(3) + stw r31,((JB_GPRS+17)*4)(3) + stfd fp31,((JB_FPRS+17*2)*4)(3) +#ifndef __NO_VMX__ +# ifdef PIC + mflr r6 + cfi_register(lr,r6) + SETUP_GOT_ACCESS(r5,got_label) + addis r5,r5,_GLOBAL_OFFSET_TABLE_-got_label@ha + addi r5,r5,_GLOBAL_OFFSET_TABLE_-got_label@l + mtlr r6 + cfi_same_value (lr) +# ifdef SHARED +# if IS_IN (rtld) + /* Inside ld.so we use the local alias to avoid runtime GOT + relocations. */ + lwz r5,_rtld_local_ro@got(r5) +# else + lwz r5,_rtld_global_ro@got(r5) +# endif + lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5) +# else + lwz r5,_dl_hwcap@got(r5) + lwz r5,LOWORD(r5) +# endif +# else + lis r6,(_dl_hwcap+LOWORD)@ha + lwz r5,(_dl_hwcap+LOWORD)@l(r6) +# endif + andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) + beq L(no_vmx) + la r5,((JB_VRS)*4)(3) + andi. r6,r5,0xf + mfspr r0,VRSAVE + stw r0,((JB_VRSAVE)*4)(3) + addi r6,r5,16 + beq+ L(aligned_save_vmx) + + lvsr v0,0,r5 + lvsl v1,0,r5 + addi r6,r5,-16 + +# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \ + addi addgpr,addgpr,32; \ + vperm tmpvr,prevvr,savevr,shiftvr; \ + stvx tmpvr,0,savegpr + + /* + * We have to be careful not to corrupt the data below v20 and + * above v31. To keep things simple we just rotate both ends in + * the opposite direction to our main permute so we can use + * the common macro. + */ + + /* load and rotate data below v20 */ + lvx v2,0,r5 + vperm v2,v2,v2,v1 + save_misaligned_vmx(v20,v2,v0,v3,r5,r6) + save_misaligned_vmx(v21,v20,v0,v3,r6,r5) + save_misaligned_vmx(v22,v21,v0,v3,r5,r6) + save_misaligned_vmx(v23,v22,v0,v3,r6,r5) + save_misaligned_vmx(v24,v23,v0,v3,r5,r6) + save_misaligned_vmx(v25,v24,v0,v3,r6,r5) + save_misaligned_vmx(v26,v25,v0,v3,r5,r6) + save_misaligned_vmx(v27,v26,v0,v3,r6,r5) + save_misaligned_vmx(v28,v27,v0,v3,r5,r6) + save_misaligned_vmx(v29,v28,v0,v3,r6,r5) + save_misaligned_vmx(v30,v29,v0,v3,r5,r6) + save_misaligned_vmx(v31,v30,v0,v3,r6,r5) + /* load and rotate data above v31 */ + lvx v2,0,r6 + vperm v2,v2,v2,v1 + save_misaligned_vmx(v2,v31,v0,v3,r5,r6) + + b L(no_vmx) + +L(aligned_save_vmx): + stvx 20,0,r5 + addi r5,r5,32 + stvx 21,0,r6 + addi r6,r6,32 + stvx 22,0,r5 + addi r5,r5,32 + stvx 23,0,r6 + addi r6,r6,32 + stvx 24,0,r5 + addi r5,r5,32 + stvx 25,0,r6 + addi r6,r6,32 + stvx 26,0,r5 + addi r5,r5,32 + stvx 27,0,r6 + addi r6,r6,32 + stvx 28,0,r5 + addi r5,r5,32 + stvx 29,0,r6 + addi r6,r6,32 + stvx 30,0,r5 + stvx 31,0,r6 +L(no_vmx): +#endif + b __sigjmp_save_symbol@local +END (__sigsetjmp_symbol) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/setjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/setjmp.S new file mode 100644 index 0000000000..02b17d3467 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/fpu/setjmp.S @@ -0,0 +1,47 @@ +/* non altivec (old) version of setjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libc-symbols.h> +#include <rtld-global-offsets.h> +#include <shlib-compat.h> + +#if !IS_IN (libc) +/* Build a non-versioned object for rtld-*. */ +# define __sigsetjmp_symbol __sigsetjmp +# define __sigjmp_save_symbol __sigjmp_save +# include "setjmp-common.S" + +#else /* IS_IN (libc) */ +/* Build a versioned object for libc. */ +versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4) +# define __sigsetjmp_symbol __vmx__sigsetjmp +# define __sigjmp_save_symbol __vmx__sigjmp_save +# include "setjmp-common.S" +libc_hidden_ver (__vmx__sigsetjmp, __sigsetjmp) + +# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +# define __NO_VMX__ +# undef __sigsetjmp_symbol +# undef __sigjmp_save_symbol +# undef JB_SIZE +compat_symbol (libc, __novmx__sigsetjmp, __sigsetjmp, GLIBC_2_0) +# define __sigsetjmp_symbol __novmx__sigsetjmp +# define __sigjmp_save_symbol __novmx__sigjmp_save +# include "setjmp-common.S" +# endif +#endif /* IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/gprrest0.S b/REORG.TODO/sysdeps/powerpc/powerpc32/gprrest0.S new file mode 100644 index 0000000000..a379e0248e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/gprrest0.S @@ -0,0 +1,69 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + General Purpose Register (GPR) restore routine + when Floating Point Registers (FPRs) are not saved + + Note: This restore routine must not be called when GPR30 or + GPR31, or both, are the only registers beings saved. In these + cases, the saving and restoring must be done inline. +*/ + +#include <sysdep.h> + +ENTRY(_restgpr0_all) + .globl C_TEXT(_restgpr0_13) +C_TEXT(_restgpr0_13): lwz r13,-76(r1) + .globl C_TEXT(_restgpr0_14) +C_TEXT(_restgpr0_14): lwz r14,-72(r1) + .globl C_TEXT(_restgpr0_15) +C_TEXT(_restgpr0_15): lwz r15,-68(r1) + .globl C_TEXT(_restgpr0_16) +C_TEXT(_restgpr0_16): lwz r16,-64(r1) + .globl C_TEXT(_restgpr0_17) +C_TEXT(_restgpr0_17): lwz r17,-60(r1) + .globl C_TEXT(_restgpr0_18) +C_TEXT(_restgpr0_18): lwz r18,-56(r1) + .globl C_TEXT(_restgpr0_19) +C_TEXT(_restgpr0_19): lwz r19,-52(r1) + .globl C_TEXT(_restgpr0_20) +C_TEXT(_restgpr0_20): lwz r20,-48(r1) + .globl C_TEXT(_restgpr0_21) +C_TEXT(_restgpr0_21): lwz r21,-44(r1) + .globl C_TEXT(_restgpr0_22) +C_TEXT(_restgpr0_22): lwz r22,-40(r1) + .globl C_TEXT(_restgpr0_23) +C_TEXT(_restgpr0_23): lwz r23,-36(r1) + .globl C_TEXT(_restgpr0_24) +C_TEXT(_restgpr0_24): lwz r24,-32(r1) + .globl C_TEXT(_restgpr0_25) +C_TEXT(_restgpr0_25): lwz r25,-28(r1) + .globl C_TEXT(_restgpr0_26) +C_TEXT(_restgpr0_26): lwz r26,-24(r1) + .globl C_TEXT(_restgpr0_27) +C_TEXT(_restgpr0_27): lwz r27,-20(r1) + .globl C_TEXT(_restgpr0_28) +C_TEXT(_restgpr0_28): lwz r28,-16(r1) + .globl C_TEXT(_restgpr0_29) +C_TEXT(_restgpr0_29): lwz r0,8(r1) #get return address from frame + lwz r29,-12(r1) #restore r29 + mtlr r0 #move return address to LR + lwz r30,-8(r1) #restore r30 + lwz r31,-4(r1) #restore r31 + blr #return +END (_restgpr0_all) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/gprrest1.S b/REORG.TODO/sysdeps/powerpc/powerpc32/gprrest1.S new file mode 100644 index 0000000000..c3ac120ca5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/gprrest1.S @@ -0,0 +1,63 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + General Purpose Register (GPR) restore routine + when Floating Point Registers (FPRs) are saved +*/ + +#include <sysdep.h> + +ENTRY(_restgpr1_all) + .globl C_TEXT(_restgpr1_13) +C_TEXT(_restgpr1_13): lwz r13,-76(r12) + .globl C_TEXT(_restgpr1_14) +C_TEXT(_restgpr1_14): lwz r14,-72(r12) + .globl C_TEXT(_restgpr1_15) +C_TEXT(_restgpr1_15): lwz r15,-68(r12) + .globl C_TEXT(_restgpr1_16) +C_TEXT(_restgpr1_16): lwz r16,-64(r12) + .globl C_TEXT(_restgpr1_17) +C_TEXT(_restgpr1_17): lwz r17,-60(r12) + .globl C_TEXT(_restgpr1_18) +C_TEXT(_restgpr1_18): lwz r18,-56(r12) + .globl C_TEXT(_restgpr1_19) +C_TEXT(_restgpr1_19): lwz r19,-52(r12) + .globl C_TEXT(_restgpr1_20) +C_TEXT(_restgpr1_20): lwz r20,-48(r12) + .globl C_TEXT(_restgpr1_21) +C_TEXT(_restgpr1_21): lwz r21,-44(r12) + .globl C_TEXT(_restgpr1_22) +C_TEXT(_restgpr1_22): lwz r22,-40(r12) + .globl C_TEXT(_restgpr1_23) +C_TEXT(_restgpr1_23): lwz r23,-36(r12) + .globl C_TEXT(_restgpr1_24) +C_TEXT(_restgpr1_24): lwz r24,-32(r12) + .globl C_TEXT(_restgpr1_25) +C_TEXT(_restgpr1_25): lwz r25,-28(r12) + .globl C_TEXT(_restgpr1_26) +C_TEXT(_restgpr1_26): lwz r26,-24(r12) + .globl C_TEXT(_restgpr1_27) +C_TEXT(_restgpr1_27): lwz r27,-20(r12) + .globl C_TEXT(_restgpr1_28) +C_TEXT(_restgpr1_28): lwz r28,-16(r12) + .globl C_TEXT(_restgpr1_29) +C_TEXT(_restgpr1_29): lwz r29,-12(r12) #restore r29 + lwz r30,-8(r12) #restore r30 + lwz r31,-4(r12) #restore r31 + blr #return +END (_restgpr1_all) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/gprsave0.S b/REORG.TODO/sysdeps/powerpc/powerpc32/gprsave0.S new file mode 100644 index 0000000000..4c474514c6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/gprsave0.S @@ -0,0 +1,87 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + General Purpose Register (GPR) save routine + when Floating Point Registers (FPRs) are not saved + + Note: This save routine must not be called when GPR30 or + GPR31, or both, are the only registers beings saved. In these + cases, the saving and restoring must be done inline. +*/ + +#include <sysdep.h> + +ENTRY(_savegpr0_all) + .globl C_TEXT(_savegpr0_13) +C_TEXT(_savegpr0_13): stw r13,-76(r1) + cfi_offset(r13,-76) + .globl C_TEXT(_savegpr0_14) +C_TEXT(_savegpr0_14): stw r14,-72(r1) + cfi_offset(r14,-72) + .globl C_TEXT(_savegpr0_15) +C_TEXT(_savegpr0_15): stw r15,-68(r1) + cfi_offset(r15,-68) + .globl C_TEXT(_savegpr0_16) +C_TEXT(_savegpr0_16): stw r16,-64(r1) + cfi_offset(r16,-64) + .globl C_TEXT(_savegpr0_17) +C_TEXT(_savegpr0_17): stw r17,-60(r1) + cfi_offset(r17,-60) + .globl C_TEXT(_savegpr0_18) +C_TEXT(_savegpr0_18): stw r18,-56(r1) + cfi_offset(r18,-56) + .globl C_TEXT(_savegpr0_19) +C_TEXT(_savegpr0_19): stw r19,-52(r1) + cfi_offset(r19,-52) + .globl C_TEXT(_savegpr0_20) +C_TEXT(_savegpr0_20): stw r20,-48(r1) + cfi_offset(r20,-48) + .globl C_TEXT(_savegpr0_21) +C_TEXT(_savegpr0_21): stw r21,-44(r1) + cfi_offset(r21,-44) + .globl C_TEXT(_savegpr0_22) +C_TEXT(_savegpr0_22): stw r22,-40(r1) + cfi_offset(r22,-40) + .globl C_TEXT(_savegpr0_23) +C_TEXT(_savegpr0_23): stw r23,-36(r1) + cfi_offset(r23,-36) + .globl C_TEXT(_savegpr0_24) +C_TEXT(_savegpr0_24): stw r24,-32(r1) + cfi_offset(r24,-32) + .globl C_TEXT(_savegpr0_25) +C_TEXT(_savegpr0_25): stw r25,-28(r1) + cfi_offset(r25,-28) + .globl C_TEXT(_savegpr0_26) +C_TEXT(_savegpr0_26): stw r26,-24(r1) + cfi_offset(r26,-24) + .globl C_TEXT(_savegpr0_27) +C_TEXT(_savegpr0_27): stw r27,-20(r1) + cfi_offset(r27,-20) + .globl C_TEXT(_savegpr0_28) +C_TEXT(_savegpr0_28): stw r28,-16(r1) + cfi_offset(r28,-16) + .globl C_TEXT(_savegpr0_29) +C_TEXT(_savegpr0_29): stw r29,-12(r1) #save r29 + stw r30,-8(r1) #save r30 + stw r31,-4(r1) #save r31 + cfi_offset(r29,-12) + cfi_offset(r30,-8) + cfi_offset(r31,-4) + stw r0,8(r1) #save LR in callers frame + blr #return +END (_savegpr0_all) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/gprsave1.S b/REORG.TODO/sysdeps/powerpc/powerpc32/gprsave1.S new file mode 100644 index 0000000000..89da657070 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/gprsave1.S @@ -0,0 +1,63 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + General Purpose Register (GPR) save routine + when Floating Point Registers (FPRs) are saved +*/ + +#include <sysdep.h> + +ENTRY(_savegpr1_all) + .globl C_TEXT(_savegpr1_13) +C_TEXT(_savegpr1_13): stw r13,-76(r12) + .globl C_TEXT(_savegpr1_14) +C_TEXT(_savegpr1_14): stw r14,-72(r12) + .globl C_TEXT(_savegpr1_15) +C_TEXT(_savegpr1_15): stw r15,-68(r12) + .globl C_TEXT(_savegpr1_16) +C_TEXT(_savegpr1_16): stw r16,-64(r12) + .globl C_TEXT(_savegpr1_17) +C_TEXT(_savegpr1_17): stw r17,-60(r12) + .globl C_TEXT(_savegpr1_18) +C_TEXT(_savegpr1_18): stw r18,-56(r12) + .globl C_TEXT(_savegpr1_19) +C_TEXT(_savegpr1_19): stw r19,-52(r12) + .globl C_TEXT(_savegpr1_20) +C_TEXT(_savegpr1_20): stw r20,-48(r12) + .globl C_TEXT(_savegpr1_21) +C_TEXT(_savegpr1_21): stw r21,-44(r12) + .globl C_TEXT(_savegpr1_22) +C_TEXT(_savegpr1_22): stw r22,-40(r12) + .globl C_TEXT(_savegpr1_23) +C_TEXT(_savegpr1_23): stw r23,-36(r12) + .globl C_TEXT(_savegpr1_24) +C_TEXT(_savegpr1_24): stw r24,-32(r12) + .globl C_TEXT(_savegpr1_25) +C_TEXT(_savegpr1_25): stw r25,-28(r12) + .globl C_TEXT(_savegpr1_26) +C_TEXT(_savegpr1_26): stw r26,-24(r12) + .globl C_TEXT(_savegpr1_27) +C_TEXT(_savegpr1_27): stw r27,-20(r12) + .globl C_TEXT(_savegpr1_28) +C_TEXT(_savegpr1_28): stw r28,-16(r12) + .globl C_TEXT(_savegpr1_29) +C_TEXT(_savegpr1_29): stw r29,-12(r12) #save r29 + stw r30,-8(r12) #save r30 + stw r31,-4(r12) #save r31 + blr #return +END (_savegpr1_all) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/libgcc-compat.S b/REORG.TODO/sysdeps/powerpc/powerpc32/libgcc-compat.S new file mode 100644 index 0000000000..431ccaa4b3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/libgcc-compat.S @@ -0,0 +1,137 @@ +/* pre-.hidden libgcc compatibility + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + + .file "libgcc-compat.S" + +#include <shlib-compat.h> + +#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_2_6) + + .symver __ashldi3_v_glibc20,__ashldi3@GLIBC_2.0 + .symver __ashrdi3_v_glibc20,__ashrdi3@GLIBC_2.0 + .symver __lshrdi3_v_glibc20,__lshrdi3@GLIBC_2.0 + .symver __cmpdi2_v_glibc20,__cmpdi2@GLIBC_2.0 + .symver __ucmpdi2_v_glibc20,__ucmpdi2@GLIBC_2.0 +#if !defined _SOFT_FLOAT && !defined __NO_FPRS__ + .symver __fixdfdi_v_glibc20,__fixdfdi@GLIBC_2.0 + .symver __fixunsdfdi_v_glibc20,__fixunsdfdi@GLIBC_2.0 + .symver __fixsfdi_v_glibc20,__fixsfdi@GLIBC_2.0 + .symver __fixunssfdi_v_glibc20,__fixunssfdi@GLIBC_2.0 + .symver __floatdidf_v_glibc20,__floatdidf@GLIBC_2.0 + .symver __floatdisf_v_glibc20,__floatdisf@GLIBC_2.0 +#endif + +#ifdef HAVE_DOT_HIDDEN + .hidden __ashldi3 + .hidden __ashrdi3 + .hidden __lshrdi3 + .hidden __cmpdi2 + .hidden __ucmpdi2 +# if !defined _SOFT_FLOAT && !defined __NO_FPRS__ + .hidden __fixdfdi + .hidden __fixsfdi + .hidden __fixunsdfdi + .hidden __fixunssfdi + .hidden __floatdidf + .hidden __floatdisf +# endif +#endif + + .section ".text" + + .align 2 + .globl __ashldi3_v_glibc20 + .type __ashldi3_v_glibc20,@function +__ashldi3_v_glibc20: + b __ashldi3@local +.Lfe5: + .size __ashldi3_v_glibc20,.Lfe5-__ashldi3_v_glibc20 + .align 2 + .globl __ashrdi3_v_glibc20 + .type __ashrdi3_v_glibc20,@function +__ashrdi3_v_glibc20: + b __ashrdi3@local +.Lfe6: + .size __ashrdi3_v_glibc20,.Lfe6-__ashrdi3_v_glibc20 + .align 2 + .globl __lshrdi3_v_glibc20 + .type __lshrdi3_v_glibc20,@function +__lshrdi3_v_glibc20: + b __lshrdi3@local +.Lfe7: + .size __lshrdi3_v_glibc20,.Lfe7-__lshrdi3_v_glibc20 + .align 2 + .globl __cmpdi2_v_glibc20 + .type __cmpdi2_v_glibc20,@function +__cmpdi2_v_glibc20: + b __cmpdi2@local +.Lfe8: + .size __cmpdi2_v_glibc20,.Lfe8-__cmpdi2_v_glibc20 + .align 2 + .globl __ucmpdi2_v_glibc20 + .type __ucmpdi2_v_glibc20,@function +__ucmpdi2_v_glibc20: + b __ucmpdi2@local +.Lfe9: + .size __ucmpdi2_v_glibc20,.Lfe9-__ucmpdi2_v_glibc20 +#if !defined _SOFT_FLOAT && !defined __NO_FPRS__ + .align 2 + .globl __fixdfdi_v_glibc20 + .type __fixdfdi_v_glibc20,@function +__fixdfdi_v_glibc20: + b __fixdfdi@local +.Lfe10: + .size __fixdfdi_v_glibc20,.Lfe10-__fixdfdi_v_glibc20 + .align 2 + .globl __fixunsdfdi_v_glibc20 + .type __fixunsdfdi_v_glibc20,@function +__fixunsdfdi_v_glibc20: + b __fixunsdfdi@local +.Lfe11: + .size __fixunsdfdi_v_glibc20,.Lfe11-__fixunsdfdi_v_glibc20 + .align 2 + .globl __fixsfdi_v_glibc20 + .type __fixsfdi_v_glibc20,@function +__fixsfdi_v_glibc20: + b __fixsfdi@local +.Lfe12: + .size __fixsfdi_v_glibc20,.Lfe12-__fixsfdi_v_glibc20 + .align 2 + .globl __fixunssfdi_v_glibc20 + .type __fixunssfdi_v_glibc20,@function +__fixunssfdi_v_glibc20: + b __fixunssfdi@local +.Lfe13: + .size __fixunssfdi_v_glibc20,.Lfe13-__fixunssfdi_v_glibc20 + .align 2 + .globl __floatdidf_v_glibc20 + .type __floatdidf_v_glibc20,@function +__floatdidf_v_glibc20: + b __floatdidf@local +.Lfe14: + .size __floatdidf_v_glibc20,.Lfe14-__floatdidf_v_glibc20 + .align 2 + .globl __floatdisf_v_glibc20 + .type __floatdisf_v_glibc20,@function +__floatdisf_v_glibc20: + b __floatdisf@local +.Lfe15: + .size __floatdisf_v_glibc20,.Lfe15-__floatdisf_v_glibc20 +#endif + +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/lshift.S b/REORG.TODO/sysdeps/powerpc/powerpc32/lshift.S new file mode 100644 index 0000000000..78c151f101 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/lshift.S @@ -0,0 +1,125 @@ +/* Shift a limb left, low level routine. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* mp_limb_t mpn_lshift (mp_ptr wp, mp_srcptr up, mp_size_t usize, + unsigned int cnt) */ + +EALIGN (__mpn_lshift, 3, 0) + + mtctr r5 # copy size into CTR + cmplwi cr0,r5,16 # is size < 16 + slwi r0,r5,2 + add r7,r3,r0 # make r7 point at end of res + add r4,r4,r0 # make r4 point at end of s1 + lwzu r11,-4(r4) # load first s1 limb + subfic r8,r6,32 + srw r3,r11,r8 # compute function return value + bge cr0,L(big) # branch if size >= 16 + + bdz L(end1) + +L(0): lwzu r10,-4(r4) + slw r9,r11,r6 + srw r12,r10,r8 + or r9,r9,r12 + stwu r9,-4(r7) + bdz L(end2) + lwzu r11,-4(r4) + slw r9,r10,r6 + srw r12,r11,r8 + or r9,r9,r12 + stwu r9,-4(r7) + bdnz L(0) + +L(end1):slw r0,r11,r6 + stw r0,-4(r7) + blr + + +/* Guaranteed not to succeed. */ +L(boom): tweq r0,r0 + +/* We imitate a case statement, by using (yuk!) fixed-length code chunks, + of size 4*12 bytes. We have to do this (or something) to make this PIC. */ +L(big): mflr r9 + cfi_register(lr,r9) + bltl- cr0,L(boom) # Never taken, only used to set LR. + slwi r10,r6,4 + mflr r12 + add r10,r12,r10 + slwi r8,r6,5 + add r10,r8,r10 + mtctr r10 + addi r5,r5,-1 + mtlr r9 + cfi_same_value (lr) + bctr + +L(end2):slw r0,r10,r6 + stw r0,-4(r7) + blr + +#define DO_LSHIFT(n) \ + mtctr r5; \ +L(n): lwzu r10,-4(r4); \ + slwi r9,r11,n; \ + inslwi r9,r10,n,32-n; \ + stwu r9,-4(r7); \ + bdz- L(end2); \ + lwzu r11,-4(r4); \ + slwi r9,r10,n; \ + inslwi r9,r11,n,32-n; \ + stwu r9,-4(r7); \ + bdnz L(n); \ + b L(end1) + + DO_LSHIFT(1) + DO_LSHIFT(2) + DO_LSHIFT(3) + DO_LSHIFT(4) + DO_LSHIFT(5) + DO_LSHIFT(6) + DO_LSHIFT(7) + DO_LSHIFT(8) + DO_LSHIFT(9) + DO_LSHIFT(10) + DO_LSHIFT(11) + DO_LSHIFT(12) + DO_LSHIFT(13) + DO_LSHIFT(14) + DO_LSHIFT(15) + DO_LSHIFT(16) + DO_LSHIFT(17) + DO_LSHIFT(18) + DO_LSHIFT(19) + DO_LSHIFT(20) + DO_LSHIFT(21) + DO_LSHIFT(22) + DO_LSHIFT(23) + DO_LSHIFT(24) + DO_LSHIFT(25) + DO_LSHIFT(26) + DO_LSHIFT(27) + DO_LSHIFT(28) + DO_LSHIFT(29) + DO_LSHIFT(30) + DO_LSHIFT(31) + +END (__mpn_lshift) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/mcount.c b/REORG.TODO/sysdeps/powerpc/powerpc32/mcount.c new file mode 100644 index 0000000000..d8c063222a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/mcount.c @@ -0,0 +1,17 @@ +#include <shlib-compat.h> + +#define __mcount_internal ___mcount_internal + +#include <gmon/mcount.c> + +#undef __mcount_internal + +/* __mcount_internal was added in glibc 2.15 with version GLIBC_PRIVATE, + but it should have been put in version GLIBC_2.15. Mark the + GLIBC_PRIVATE version obsolete and add it to GLIBC_2.16 instead. */ +versioned_symbol (libc, ___mcount_internal, __mcount_internal, GLIBC_2_16); + +#if SHLIB_COMPAT (libc, GLIBC_2_15, GLIBC_2_16) +strong_alias (___mcount_internal, ___mcount_internal_private); +symbol_version (___mcount_internal_private, __mcount_internal, GLIBC_PRIVATE); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc32/memset.S new file mode 100644 index 0000000000..8913a02698 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/memset.S @@ -0,0 +1,307 @@ +/* Optimized memset implementation for PowerPC. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. + + The memset is done in four sizes: byte (8 bits), word (32 bits), + 32-byte blocks (256 bits) and __cache_line_size (128, 256, 1024 bits). + There is a special case for setting whole cache lines to 0, which + takes advantage of the dcbz instruction. */ + + .section ".text" +EALIGN (memset, 5, 1) + +#define rTMP r0 +#define rRTN r3 /* initial value of 1st argument */ +#define rMEMP0 r3 /* original value of 1st arg */ +#define rCHR r4 /* char to set in each byte */ +#define rLEN r5 /* length of region to set */ +#define rMEMP r6 /* address at which we are storing */ +#define rALIGN r7 /* number of bytes we are setting now (when aligning) */ +#define rMEMP2 r8 + +#define rPOS32 r7 /* constant +32 for clearing with dcbz */ +#define rNEG64 r8 /* constant -64 for clearing with dcbz */ +#define rNEG32 r9 /* constant -32 for clearing with dcbz */ + +#define rGOT r9 /* Address of the Global Offset Table. */ +#define rCLS r8 /* Cache line size obtained from static. */ +#define rCLM r9 /* Cache line size mask to check for cache alignment. */ + +/* take care of case for size <= 4 */ + cmplwi cr1, rLEN, 4 + andi. rALIGN, rMEMP0, 3 + mr rMEMP, rMEMP0 + ble- cr1, L(small) +/* align to word boundary */ + cmplwi cr5, rLEN, 31 + rlwimi rCHR, rCHR, 8, 16, 23 + beq+ L(aligned) /* 8th instruction from .align */ + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 4 + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + bf+ 31, L(g0) + stb rCHR, 0(rMEMP0) + bt 30, L(aligned) +L(g0): sth rCHR, -2(rMEMP) /* 16th instruction from .align */ +/* take care of case for size < 31 */ +L(aligned): + mtcrf 0x01, rLEN + rlwimi rCHR, rCHR, 16, 0, 15 + ble cr5, L(medium) +/* align to cache line boundary... */ + andi. rALIGN, rMEMP, 0x1C + subfic rALIGN, rALIGN, 0x20 + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + cmplwi cr1, rALIGN, 0x10 + mr rMEMP2, rMEMP + bf 28, L(a1) + stw rCHR, -4(rMEMP2) + stwu rCHR, -8(rMEMP2) +L(a1): blt cr1, L(a2) + stw rCHR, -4(rMEMP2) /* 32nd instruction from .align */ + stw rCHR, -8(rMEMP2) + stw rCHR, -12(rMEMP2) + stwu rCHR, -16(rMEMP2) +L(a2): bf 29, L(caligned) + stw rCHR, -4(rMEMP2) +/* now aligned to a cache line. */ +L(caligned): + cmplwi cr1, rCHR, 0 + clrrwi. rALIGN, rLEN, 5 + mtcrf 0x01, rLEN /* 40th instruction from .align */ + +/* Check if we can use the special case for clearing memory using dcbz. + This requires that we know the correct cache line size for this + processor. Getting the __cache_line_size may require establishing GOT + addressability, so branch out of line to set this up. */ + beq cr1, L(checklinesize) + +/* Store blocks of 32-bytes (256-bits) starting on a 32-byte boundary. + Can't assume that rCHR is zero or that the cache line size is either + 32-bytes or even known. */ +L(nondcbz): + srwi rTMP, rALIGN, 5 + mtctr rTMP + beq L(medium) /* we may not actually get to do a full line */ + clrlwi. rLEN, rLEN, 27 + add rMEMP, rMEMP, rALIGN + li rNEG64, -0x40 + bdz L(cloopdone) /* 48th instruction from .align */ + +/* We can't use dcbz here as we don't know the cache line size. We can + use "data cache block touch for store", which is safe. */ +L(c3): dcbtst rNEG64, rMEMP + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + stw rCHR, -12(rMEMP) + stw rCHR, -16(rMEMP) + nop /* let 601 fetch last 4 instructions of loop */ + stw rCHR, -20(rMEMP) + stw rCHR, -24(rMEMP) /* 56th instruction from .align */ + nop /* let 601 fetch first 8 instructions of loop */ + stw rCHR, -28(rMEMP) + stwu rCHR, -32(rMEMP) + bdnz L(c3) +L(cloopdone): + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + stw rCHR, -12(rMEMP) + stw rCHR, -16(rMEMP) /* 64th instruction from .align */ + stw rCHR, -20(rMEMP) + cmplwi cr1, rLEN, 16 + stw rCHR, -24(rMEMP) + stw rCHR, -28(rMEMP) + stwu rCHR, -32(rMEMP) + beqlr + add rMEMP, rMEMP, rALIGN + b L(medium_tail2) /* 72nd instruction from .align */ + + .align 5 + nop +/* Clear cache lines of memory in 128-byte chunks. + This code is optimized for processors with 32-byte cache lines. + It is further optimized for the 601 processor, which requires + some care in how the code is aligned in the i-cache. */ +L(zloopstart): + clrlwi rLEN, rLEN, 27 + mtcrf 0x02, rALIGN + srwi. rTMP, rALIGN, 7 + mtctr rTMP + li rPOS32, 0x20 + li rNEG64, -0x40 + cmplwi cr1, rLEN, 16 /* 8 */ + bf 26, L(z0) + dcbz 0, rMEMP + addi rMEMP, rMEMP, 0x20 +L(z0): li rNEG32, -0x20 + bf 25, L(z1) + dcbz 0, rMEMP + dcbz rPOS32, rMEMP + addi rMEMP, rMEMP, 0x40 /* 16 */ +L(z1): cmplwi cr5, rLEN, 0 + beq L(medium) +L(zloop): + dcbz 0, rMEMP + dcbz rPOS32, rMEMP + addi rMEMP, rMEMP, 0x80 + dcbz rNEG64, rMEMP + dcbz rNEG32, rMEMP + bdnz L(zloop) + beqlr cr5 + b L(medium_tail2) + + .align 5 +L(small): +/* Memset of 4 bytes or less. */ + cmplwi cr5, rLEN, 1 + cmplwi cr1, rLEN, 3 + bltlr cr5 + stb rCHR, 0(rMEMP) + beqlr cr5 + nop + stb rCHR, 1(rMEMP) + bltlr cr1 + stb rCHR, 2(rMEMP) + beqlr cr1 + nop + stb rCHR, 3(rMEMP) + blr + +/* Memset of 0-31 bytes. */ + .align 5 +L(medium): + cmplwi cr1, rLEN, 16 +L(medium_tail2): + add rMEMP, rMEMP, rLEN +L(medium_tail): + bt- 31, L(medium_31t) + bt- 30, L(medium_30t) +L(medium_30f): + bt- 29, L(medium_29t) +L(medium_29f): + bge- cr1, L(medium_27t) + bflr- 28 + stw rCHR, -4(rMEMP) /* 8th instruction from .align */ + stw rCHR, -8(rMEMP) + blr + +L(medium_31t): + stbu rCHR, -1(rMEMP) + bf- 30, L(medium_30f) +L(medium_30t): + sthu rCHR, -2(rMEMP) + bf- 29, L(medium_29f) +L(medium_29t): + stwu rCHR, -4(rMEMP) + blt- cr1, L(medium_27f) /* 16th instruction from .align */ +L(medium_27t): + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + stw rCHR, -12(rMEMP) + stwu rCHR, -16(rMEMP) +L(medium_27f): + bflr- 28 +L(medium_28t): + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + blr + +L(checklinesize): +#ifdef SHARED + mflr rTMP +/* If the remaining length is less the 32 bytes then don't bother getting + the cache line size. */ + beq L(medium) +/* Establishes GOT addressability so we can load __cache_line_size + from static. This value was set from the aux vector during startup. */ + SETUP_GOT_ACCESS(rGOT,got_label) + addis rGOT,rGOT,__cache_line_size-got_label@ha + lwz rCLS,__cache_line_size-got_label@l(rGOT) + mtlr rTMP +#else +/* Load __cache_line_size from static. This value was set from the + aux vector during startup. */ + lis rCLS,__cache_line_size@ha +/* If the remaining length is less the 32 bytes then don't bother getting + the cache line size. */ + beq L(medium) + lwz rCLS,__cache_line_size@l(rCLS) +#endif + +/* If the cache line size was not set then goto to L(nondcbz), which is + safe for any cache line size. */ + cmplwi cr1,rCLS,0 + beq cr1,L(nondcbz) + +/* If the cache line size is 32 bytes then goto to L(zloopstart), + which is coded specifically for 32-byte lines (and 601). */ + cmplwi cr1,rCLS,32 + beq cr1,L(zloopstart) + +/* Now we know the cache line size and it is not 32-bytes. However + we may not yet be aligned to the cache line and may have a partial + line to fill. Touch it 1st to fetch the cache line. */ + dcbtst 0,rMEMP + + addi rCLM,rCLS,-1 +L(getCacheAligned): + cmplwi cr1,rLEN,32 + and. rTMP,rCLM,rMEMP + blt cr1,L(handletail32) + beq L(cacheAligned) +/* We are not aligned to start of a cache line yet. Store 32-byte + of data and test again. */ + addi rMEMP,rMEMP,32 + addi rLEN,rLEN,-32 + stw rCHR,-32(rMEMP) + stw rCHR,-28(rMEMP) + stw rCHR,-24(rMEMP) + stw rCHR,-20(rMEMP) + stw rCHR,-16(rMEMP) + stw rCHR,-12(rMEMP) + stw rCHR,-8(rMEMP) + stw rCHR,-4(rMEMP) + b L(getCacheAligned) + +/* Now we are aligned to the cache line and can use dcbz. */ +L(cacheAligned): + cmplw cr1,rLEN,rCLS + blt cr1,L(handletail32) + dcbz 0,rMEMP + subf rLEN,rCLS,rLEN + add rMEMP,rMEMP,rCLS + b L(cacheAligned) + +/* We are here because; the cache line size was set, it was not + 32-bytes, and the remainder (rLEN) is now less than the actual cache + line size. Set up the preconditions for L(nondcbz) and go there to + store the remaining bytes. */ +L(handletail32): + clrrwi. rALIGN, rLEN, 5 + b L(nondcbz) + +END (memset) +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/mul_1.S b/REORG.TODO/sysdeps/powerpc/powerpc32/mul_1.S new file mode 100644 index 0000000000..c3093e2bc6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/mul_1.S @@ -0,0 +1,45 @@ +/* Multiply a limb vector by a limb, for PowerPC. + Copyright (C) 1993-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* mp_limb_t mpn_mul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, + mp_size_t s1_size, mp_limb_t s2_limb) + Calculate s1*s2 and put result in res_ptr; return carry. */ + +ENTRY (__mpn_mul_1) + mtctr r5 + + lwz r0,0(r4) + mullw r7,r0,r6 + mulhwu r10,r0,r6 + addi r3,r3,-4 # adjust res_ptr + addic r5,r5,0 # clear cy with dummy insn + bdz L(1) + +L(0): lwzu r0,4(r4) + stwu r7,4(r3) + mullw r8,r0,r6 + adde r7,r8,r10 + mulhwu r10,r0,r6 + bdnz L(0) + +L(1): stw r7,4(r3) + addze r3,r10 + blr +END (__mpn_mul_1) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/Implies new file mode 100644 index 0000000000..a372141bb7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/Implies @@ -0,0 +1,2 @@ +powerpc/power4/fpu +powerpc/power4 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/Makefile new file mode 100644 index 0000000000..ba06adb5d0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/Makefile @@ -0,0 +1,6 @@ +# Makefile fragment for POWER4/5/5+. + +ifeq ($(subdir),string) +CFLAGS-wordcopy.c += --param max-variable-expansions-in-unroller=2 --param max-unroll-times=2 -funroll-loops -fpeel-loops +CFLAGS-memmove.c += --param max-variable-expansions-in-unroller=2 --param max-unroll-times=2 -funroll-loops -fpeel-loops +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/Makefile new file mode 100644 index 0000000000..5afbade15f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/Makefile @@ -0,0 +1,43 @@ +ifeq ($(subdir),math) +sysdep_routines += s_isnan-power7 s_isnan-power6 s_isnan-power5 s_isnan-ppc32 \ + s_isnanf-power6 s_isnanf-power5 s_isinf-power7 \ + s_isinf-ppc32 s_isinff-ppc32 s_finite-power7 \ + s_finite-ppc32 s_finitef-ppc32 s_copysign-power6 \ + s_copysign-ppc32 s_modf-power5+ s_modf-ppc32 \ + s_modff-power5+ s_modff-ppc32 + +libm-sysdep_routines += s_llrintf-power6 s_llrintf-ppc32 s_llrint-power6 \ + s_llrint-ppc32 s_llround-power6 s_llround-power5+ \ + s_llround-ppc32 s_isnan-power7 \ + w_sqrt_compat-power5 w_sqrt_compat-ppc32 \ + w_sqrtf_compat-power5 w_sqrtf_compat-ppc32 \ + s_isnan-power6 s_isnan-power5 s_isnan-ppc32 \ + s_isnanf-power6 s_isnanf-power5 s_isinf-power7 \ + s_isinf-ppc32 s_isinff-ppc32 s_finite-power7 \ + s_finite-ppc32 s_finitef-ppc32 s_ceil-power5+ \ + s_ceil-ppc32 s_ceilf-power5+ s_ceilf-ppc32 \ + s_floor-power5+ s_floor-ppc32 s_floorf-power5+ \ + s_floorf-ppc32 s_round-power5+ s_round-ppc32 \ + s_roundf-power5+ s_roundf-ppc32 s_trunc-power5+ \ + s_trunc-ppc32 s_truncf-power5+ s_truncf-ppc32 \ + s_copysign-power6 s_copysign-ppc32 s_lround-power6x \ + s_lround-power5+ s_lround-ppc32 s_lrint-power6x \ + s_lrint-ppc32 s_modf-power5+ s_modf-ppc32 \ + s_modff-power5+ s_modff-ppc32 s_logbl-power7 \ + s_logbl-ppc32 s_logb-power7 s_logb-ppc32 \ + s_logbf-power7 s_logbf-ppc32 e_hypot-power7 \ + e_hypot-ppc32 e_hypotf-power7 e_hypotf-ppc32 + +CFLAGS-s_modf-power5+.c = -mcpu=power5+ +CFLAGS-s_modff-power5+.c = -mcpu=power5+ +CFLAGS-s_logbl-power7.c = -mcpu=power7 +CFLAGS-s_logb-power7.c = -mcpu=power7 +CFLAGS-s_logbf-power7.c = -mcpu=power7 +CFLAGS-e_hypot-power7.c = -mcpu=power7 +CFLAGS-e_hypotf-power7.c = -mcpu=power7 + +# These files quiet sNaNs in a way that is optimized away without +# -fsignaling-nans. +CFLAGS-s_modf-ppc32.c += -fsignaling-nans +CFLAGS-s_modff-ppc32.c += -fsignaling-nans +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot-power7.c new file mode 100644 index 0000000000..d62b7c4f7b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot-power7.c @@ -0,0 +1,26 @@ +/* __ieee_hypot() POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_hypot __ieee754_hypot_power7 + +#include <sysdeps/powerpc/fpu/e_hypot.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot-ppc32.c new file mode 100644 index 0000000000..25984b724c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot-ppc32.c @@ -0,0 +1,26 @@ +/* __ieee_hypot() PowerPC32 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_hypot __ieee754_hypot_ppc32 + +#include <sysdeps/powerpc/fpu/e_hypot.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot.c new file mode 100644 index 0000000000..5b8a06b4d1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot.c @@ -0,0 +1,32 @@ +/* Multiple versions of ieee754_hypot. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ieee754_hypot) __ieee754_hypot_ppc32 attribute_hidden; +extern __typeof (__ieee754_hypot) __ieee754_hypot_power7 attribute_hidden; + +libc_ifunc (__ieee754_hypot, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __ieee754_hypot_power7 + : __ieee754_hypot_ppc32); + +strong_alias (__ieee754_hypot, __hypot_finite) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf-power7.c new file mode 100644 index 0000000000..f52bc635d1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf-power7.c @@ -0,0 +1,26 @@ +/* __ieee754_hypot POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_hypotf __ieee754_hypotf_power7 + +#include <sysdeps/powerpc/fpu/e_hypotf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf-ppc32.c new file mode 100644 index 0000000000..d9f86163c9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf-ppc32.c @@ -0,0 +1,26 @@ +/* __ieee_hypot() PowerPC32 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_hypotf __ieee754_hypotf_ppc32 + +#include <sysdeps/ieee754/flt-32/e_hypotf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf.c new file mode 100644 index 0000000000..d1fc0ce532 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf.c @@ -0,0 +1,32 @@ +/* Multiple versions of ieee754_hypotf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ieee754_hypotf) __ieee754_hypotf_ppc32 attribute_hidden; +extern __typeof (__ieee754_hypotf) __ieee754_hypotf_power7 attribute_hidden; + +libc_ifunc (__ieee754_hypotf, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __ieee754_hypotf_power7 + : __ieee754_hypotf_ppc32); + +strong_alias (__ieee754_hypotf, __hypotf_finite) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceil-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceil-power5+.S new file mode 100644 index 0000000000..670d6bbffb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceil-power5+.S @@ -0,0 +1,33 @@ +/* ceil function. PowerPC32/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __ceil __ceil_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_ceil.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceil-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceil-ppc32.S new file mode 100644 index 0000000000..77d43c5de7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceil-ppc32.S @@ -0,0 +1,31 @@ +/* ceil function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __ceil __ceil_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_ceil.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceil.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceil.c new file mode 100644 index 0000000000..4e3d980ce6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceil.c @@ -0,0 +1,40 @@ +/* Multiple versions of ceil. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ceil) __ceil_ppc32 attribute_hidden; +extern __typeof (__ceil) __ceil_power5plus attribute_hidden; + +libc_ifunc (__ceil, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __ceil_power5plus + : __ceil_ppc32); + +weak_alias (__ceil, ceil) + +#ifdef NO_LONG_DOUBLE +strong_alias (__ceil, __ceill) +weak_alias (__ceil, ceill) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __ceil, ceill, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceilf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceilf-power5+.S new file mode 100644 index 0000000000..089261460e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceilf-power5+.S @@ -0,0 +1,26 @@ +/* ceilf function. PowerPC32/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(name, alias) + +#define __ceilf __ceilf_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_ceilf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceilf-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceilf-ppc32.S new file mode 100644 index 0000000000..c783919f3a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceilf-ppc32.S @@ -0,0 +1,27 @@ +/* ceilf function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __ceilf __ceilf_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_ceilf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceilf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceilf.c new file mode 100644 index 0000000000..9674001caa --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_ceilf.c @@ -0,0 +1,32 @@ +/* Multiple versions of ceilf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ceilf) __ceilf_ppc32 attribute_hidden; +extern __typeof (__ceilf) __ceilf_power5plus attribute_hidden; + +libc_ifunc (__ceilf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __ceilf_power5plus + : __ceilf_ppc32); + +weak_alias (__ceilf, ceilf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysign-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysign-power6.S new file mode 100644 index 0000000000..1f58420763 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysign-power6.S @@ -0,0 +1,33 @@ +/* copysign(). PowerPC32/POWER6 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __copysign __copysign_power6 + +#include <sysdeps/powerpc/powerpc32/power6/fpu/s_copysign.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysign-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysign-ppc32.S new file mode 100644 index 0000000000..5d46f0379a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysign-ppc32.S @@ -0,0 +1,34 @@ +/* copysign(). PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a, b, c, d) + +#define __copysign __copysign_ppc32 +#undef hidden_def +#define hidden_def(name) + strong_alias (__copysign_ppc32, __GI___copysign) + +#include <sysdeps/powerpc/powerpc32/fpu/s_copysign.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysign.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysign.c new file mode 100644 index 0000000000..bddc1ab3c5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysign.c @@ -0,0 +1,51 @@ +/* Multiple versions of copysign. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Redefine copysign so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias below. */ +#undef __copysign +#define __copysign __redirect_copysign +#include <math.h> +#include <math_ldbl_opt.h> +#undef __copysign +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__redirect_copysign) __copysign_ppc32 attribute_hidden; +extern __typeof (__redirect_copysign) __copysign_power6 attribute_hidden; + +extern __typeof (__redirect_copysign) __libm_copysign; +libc_ifunc (__libm_copysign, + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __copysign_power6 + : __copysign_ppc32); + +strong_alias (__libm_copysign, __copysign) +weak_alias (__copysign, copysign) + +#ifdef NO_LONG_DOUBLE +weak_alias (__copysign,copysignl) +strong_alias(__copysign,__copysignl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __copysign, copysignl, GLIBC_2_0); +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __copysign, copysignl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysignf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysignf.c new file mode 100644 index 0000000000..7709e08968 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_copysignf.c @@ -0,0 +1,32 @@ +/* Multiple versions of copysignf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +/* It's safe to use double-precision implementation for single-precision. */ +extern __typeof (__copysignf) __copysign_ppc32 attribute_hidden; +extern __typeof (__copysignf) __copysign_power6 attribute_hidden; + +libc_ifunc (__copysignf, + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __copysign_power6 + : __copysign_ppc32); + +weak_alias (__copysignf, copysignf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite-power7.S new file mode 100644 index 0000000000..eb23f73e6e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite-power7.S @@ -0,0 +1,33 @@ +/* finite(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __finite __finite_power7 + +#include <sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite-ppc32.c new file mode 100644 index 0000000000..495dd36d0b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite-ppc32.c @@ -0,0 +1,33 @@ +/* finite(). PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef strong_alias +#define strong_alias(a, b) + +#define FINITE __finite_ppc32 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__finite_ppc32, __GI___finite, __finite_ppc32); +#endif + +#include <sysdeps/ieee754/dbl-64/s_finite.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite.c new file mode 100644 index 0000000000..d3d0755bc0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finite.c @@ -0,0 +1,57 @@ +/* Multiple versions of finite. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __finite __redirect___finite +#define __finitef __redirect___finitef +#define __finitel __redirect___finitel +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__finite) __finite_ppc32 attribute_hidden; +extern __typeof (__finite) __finite_power7 attribute_hidden; +#undef __finite +#undef __finitef +#undef __finitel + +libc_ifunc_redirected (__redirect___finite, __finite, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __finite_power7 + : __finite_ppc32); + +weak_alias (__finite, finite) + +#ifdef NO_LONG_DOUBLE +strong_alias (__finite, __finitel) +weak_alias (__finite, finitel) +#endif + +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, finite, finitel, GLIBC_2_0); +# endif +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_1) +compat_symbol (libm, __finite, __finitel, GLIBC_2_1); +# endif +#else +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __finite, __finitel, GLIBC_2_0); +compat_symbol (libc, finite, finitel, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finitef-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finitef-ppc32.c new file mode 100644 index 0000000000..4a52949066 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finitef-ppc32.c @@ -0,0 +1,31 @@ +/* finitef(). PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define FINITEF __finitef_ppc32 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__finitef_ppc32, __GI___finitef, __finitef_ppc32); +#endif + +#include <sysdeps/ieee754/flt-32/s_finitef.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finitef.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finitef.c new file mode 100644 index 0000000000..fa214f37ae --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_finitef.c @@ -0,0 +1,34 @@ +/* Multiple versions of finitef. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __finitef __redirect___finitef +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__finitef) __finitef_ppc32 attribute_hidden; +/* The power7 finite(double) works for float. */ +extern __typeof (__finitef) __finite_power7 attribute_hidden; +#undef __finitef + +libc_ifunc_redirected (__redirect___finitef, __finitef, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __finite_power7 + : __finitef_ppc32); + +weak_alias (__finitef, finitef) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floor-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floor-power5+.S new file mode 100644 index 0000000000..dfecd1c59e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floor-power5+.S @@ -0,0 +1,33 @@ +/* floor function. PowerPC32/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __floor __floor_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_floor.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floor-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floor-ppc32.S new file mode 100644 index 0000000000..3af604ca25 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floor-ppc32.S @@ -0,0 +1,31 @@ +/* floor function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __floor __floor_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_floor.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floor.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floor.c new file mode 100644 index 0000000000..0da528c922 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floor.c @@ -0,0 +1,40 @@ +/* Multiple versions of floor. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__floor) __floor_ppc32 attribute_hidden; +extern __typeof (__floor) __floor_power5plus attribute_hidden; + +libc_ifunc (__floor, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __floor_power5plus + : __floor_ppc32); + +weak_alias (__floor, floor) + +#ifdef NO_LONG_DOUBLE +strong_alias (__floor, __floorl) +weak_alias (__floor, floorl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __floor, floorl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floorf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floorf-power5+.S new file mode 100644 index 0000000000..fd2fa331bb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floorf-power5+.S @@ -0,0 +1,26 @@ +/* floorf function. PowerPC32/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(name, alias) + +#define __floorf __floorf_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_floorf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floorf-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floorf-ppc32.S new file mode 100644 index 0000000000..55bee8652b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floorf-ppc32.S @@ -0,0 +1,27 @@ +/* floorf function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __floorf __floorf_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_floorf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floorf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floorf.c new file mode 100644 index 0000000000..56375097f7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_floorf.c @@ -0,0 +1,32 @@ +/* Multiple versions of floorf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__floorf) __floorf_ppc32 attribute_hidden; +extern __typeof (__floorf) __floorf_power5plus attribute_hidden; + +libc_ifunc (__floorf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __floorf_power5plus + : __floorf_ppc32); + +weak_alias (__floorf, floorf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinf-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinf-power7.S new file mode 100644 index 0000000000..f7c7510649 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinf-power7.S @@ -0,0 +1,33 @@ +/* isinf(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __isinf __isinf_power7 + +#include <sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinf-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinf-ppc32.c new file mode 100644 index 0000000000..0d1cb75cf3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinf-ppc32.c @@ -0,0 +1,33 @@ +/* isinf(). PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef strong_alias +#define strong_alias(a, b) + +#define __isinf __isinf_ppc32 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__isinf_ppc32, __GI___isinf, __isinf_ppc32); +#endif + +#include <sysdeps/ieee754/dbl-64/s_isinf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinf.c new file mode 100644 index 0000000000..c7d7568ce0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinf.c @@ -0,0 +1,50 @@ +/* Multiple versions of isinf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __isinf __redirect___isinf +#define __isinff __redirect___isinff +#define __isinfl __redirect___isinfl +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__isinf) __isinf_ppc32 attribute_hidden; +extern __typeof (__isinf) __isinf_power7 attribute_hidden; +#undef __isinf +#undef __isinff +#undef __isinfl + +libc_ifunc_redirected (__redirect___isinf, __isinf, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isinf_power7 + : __isinf_ppc32); + +weak_alias (__isinf, isinf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isinf, __isinfl) +weak_alias (__isinf, isinfl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0); +compat_symbol (libc, isinf, isinfl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinff-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinff-ppc32.c new file mode 100644 index 0000000000..25fd22d0c3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinff-ppc32.c @@ -0,0 +1,31 @@ +/* isinff(). PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __isinff __isinff_ppc32 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__isinff_ppc32, __GI___isinff, __isinff_ppc32); +#endif + +#include <sysdeps/ieee754/flt-32/s_isinff.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinff.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinff.c new file mode 100644 index 0000000000..fd6e9983f6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isinff.c @@ -0,0 +1,35 @@ +/* Multiple versions of isinf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __isinff __redirect___isinff +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__isinff) __isinff_ppc32 attribute_hidden; +/* The power7 isinf(double) works for float. */ +extern __typeof (__isinff) __isinf_power7 attribute_hidden; +#undef __isinff + +libc_ifunc_redirected (__redirect___isinff, __isinff, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isinf_power7 + : __isinff_ppc32); + +weak_alias (__isinff, isinff) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power5.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power5.S new file mode 100644 index 0000000000..36d6709ab0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power5.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC32/POWER5 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power5 + +#include <sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power6.S new file mode 100644 index 0000000000..0ee970330c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power6.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC32/POWER6 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power6 + +#include <sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power7.S new file mode 100644 index 0000000000..24d5a21d73 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-power7.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power7 + +#include <sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-ppc32.S new file mode 100644 index 0000000000..175229edd6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan-ppc32.S @@ -0,0 +1,32 @@ +/* isnan(). PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef compat_symbol +#define compat_symbol(a, b, c, d) + +#define __isnan __isnan_ppc32 +#undef hidden_def +#define hidden_def(name) + strong_alias (__isnan_ppc32, __GI___isnan) + +#include <sysdeps/powerpc/powerpc32/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan.c new file mode 100644 index 0000000000..79447af535 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnan.c @@ -0,0 +1,56 @@ +/* Multiple versions of isnan. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __isnan __redirect___isnan +#define __isnanf __redirect___isnanf +#define __isnanl __redirect___isnanl +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__isnan) __isnan_ppc32 attribute_hidden; +extern __typeof (__isnan) __isnan_power5 attribute_hidden; +extern __typeof (__isnan) __isnan_power6 attribute_hidden; +extern __typeof (__isnan) __isnan_power7 attribute_hidden; +#undef __isnan +#undef __isnanf +#undef __isnanl + +libc_ifunc_redirected (__redirect___isnan, __isnan, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isnan_power7 + : (hwcap & PPC_FEATURE_ARCH_2_05) + ? __isnan_power6 + : (hwcap & PPC_FEATURE_POWER5) + ? __isnan_power5 + : __isnan_ppc32); + +weak_alias (__isnan, isnan) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf-power5.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf-power5.S new file mode 100644 index 0000000000..4e57289794 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf-power5.S @@ -0,0 +1,28 @@ +/* isnanf(). PowerPC32/POWER5 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) + +#define __isnanf __isnanf_power5 + +#include <sysdeps/powerpc/powerpc32/power5/fpu/s_isnanf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf-power6.S new file mode 100644 index 0000000000..40687b5f43 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf-power6.S @@ -0,0 +1,28 @@ +/* isnanf(). PowerPC32/POWER6 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) + +#define __isnanf __isnanf_power6 + +#include <sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf.c new file mode 100644 index 0000000000..12bdcffcec --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_isnanf.c @@ -0,0 +1,39 @@ +/* Multiple versions of isnanf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +/* Both ppc32 and power7 isnan(double) work for float. */ +extern __typeof (__isnanf) __isnan_ppc32 attribute_hidden; +extern __typeof (__isnanf) __isnanf_power5 attribute_hidden; +extern __typeof (__isnanf) __isnanf_power6 attribute_hidden; +extern __typeof (__isnanf) __isnan_power7 attribute_hidden; + +libc_ifunc_hidden (__isnanf, __isnanf, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isnan_power7 + : (hwcap & PPC_FEATURE_ARCH_2_05) + ? __isnanf_power6 + : (hwcap & PPC_FEATURE_POWER5) + ? __isnanf_power5 + : __isnan_ppc32); + +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrint-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrint-power6.S new file mode 100644 index 0000000000..07c0f94a2f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrint-power6.S @@ -0,0 +1,31 @@ +/* Round double to long int. PowerPC32/Power6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llrint __llrint_power6 + +#include <sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrint-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrint-ppc32.S new file mode 100644 index 0000000000..390cd9a8bc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrint-ppc32.S @@ -0,0 +1,31 @@ +/* llrint function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llrint __llrint_ppc32 + +#include <sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrint.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrint.c new file mode 100644 index 0000000000..88357ebdd9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrint.c @@ -0,0 +1,40 @@ +/* Multiple versions of llrint. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__llrint) __llrint_ppc32 attribute_hidden; +extern __typeof (__llrint) __llrint_power6 attribute_hidden; + +libc_ifunc (__llrint, + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __llrint_power6 + : __llrint_ppc32); + +weak_alias (__llrint, llrint) + +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-power6.S new file mode 100644 index 0000000000..8ebbefd3dd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-power6.S @@ -0,0 +1,26 @@ +/* Round float to long int. PowerPC32/POWER6 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __llrintf __llrintf_power6 + +#include <sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-ppc32.S new file mode 100644 index 0000000000..aa66e1ed9d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf-ppc32.S @@ -0,0 +1,26 @@ +/* llrintf function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __llrintf __llrintf_ppc32 + +#include <sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf.c new file mode 100644 index 0000000000..f513e61944 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llrintf.c @@ -0,0 +1,31 @@ +/* Multiple versions of llrintf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__llrintf) __llrintf_ppc32 attribute_hidden; +extern __typeof (__llrintf) __llrintf_power6 attribute_hidden; + +libc_ifunc (__llrintf, + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __llrintf_power6 + : __llrintf_ppc32); + +weak_alias (__llrintf, llrintf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-power5+.S new file mode 100644 index 0000000000..16e3124a3c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-power5+.S @@ -0,0 +1,31 @@ +/* lround function. PowerPC32/POWER5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llround __llround_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-power6.S new file mode 100644 index 0000000000..508c6b7a29 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-power6.S @@ -0,0 +1,31 @@ +/* lround function. PowerPC32/POWER6 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llround __llround_power6 + +#include <sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-ppc32.S new file mode 100644 index 0000000000..4ecd2a266f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-ppc32.S @@ -0,0 +1,31 @@ +/* llround function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llround __llround_ppc32 + +#include <sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround.c new file mode 100644 index 0000000000..caf8953c81 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround.c @@ -0,0 +1,43 @@ +/* Multiple versions of llround. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__llround) __llround_ppc32 attribute_hidden; +extern __typeof (__llround) __llround_power5plus attribute_hidden; +extern __typeof (__llround) __llround_power6 attribute_hidden; + +libc_ifunc (__llround, + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __llround_power6 : + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __llround_power5plus + : __llround_ppc32); + +weak_alias (__llround, llround) + +#ifdef NO_LONG_DOUBLE +strong_alias (__llround, __llroundl) +weak_alias (__llround, llroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llroundf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llroundf.c new file mode 100644 index 0000000000..1b7e45653a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llroundf.c @@ -0,0 +1,34 @@ +/* Multiple versions of llroundf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include "init-arch.h" + +/* It's safe to use double-precision implementation for single-precision. */ +extern __typeof (__llroundf) __llround_ppc32 attribute_hidden; +extern __typeof (__llroundf) __llround_power5plus attribute_hidden; +extern __typeof (__llroundf) __llround_power6 attribute_hidden; + +libc_ifunc (__llroundf, + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __llround_power6 : + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __llround_power5plus + : __llround_ppc32); + +weak_alias (__llroundf, llroundf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb-power7.c new file mode 100644 index 0000000000..20fd02a5ee --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb-power7.c @@ -0,0 +1,31 @@ +/* logb(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef strong_alias +#define strong_alias(a, b) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __logb __logb_power7 + +#include <sysdeps/powerpc/power7/fpu/s_logb.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb-ppc32.c new file mode 100644 index 0000000000..3920579dbc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb-ppc32.c @@ -0,0 +1,28 @@ +/* logb(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef strong_alias +#define strong_alias(a, b) + +#define __logb __logb_ppc32 + +#include <sysdeps/ieee754/dbl-64/s_logb.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb.c new file mode 100644 index 0000000000..fddd1ecbec --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb.c @@ -0,0 +1,41 @@ +/* Multiple versions of logb. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__logb) __logb_ppc32 attribute_hidden; +extern __typeof (__logb) __logb_power7 attribute_hidden; + +libc_ifunc (__logb, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __logb_power7 + : __logb_ppc32); + +weak_alias (__logb, logb) + +#ifdef NO_LONG_DOUBLE +strong_alias (__logb, __logbl) +weak_alias (__logb, logbl) +#endif + +#if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, logb, logbl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf-power7.c new file mode 100644 index 0000000000..6e064bedbf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf-power7.c @@ -0,0 +1,26 @@ +/* logbf(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __logbf __logbf_power7 + +#include <sysdeps/powerpc/power7/fpu/s_logbf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf-ppc32.c new file mode 100644 index 0000000000..ca9865d784 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf-ppc32.c @@ -0,0 +1,26 @@ +/* logbf(). PowerPC32 default implementation. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __logbf __logbf_ppc32 + +#include <sysdeps/ieee754/flt-32/s_logbf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf.c new file mode 100644 index 0000000000..3b9de174bd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf.c @@ -0,0 +1,32 @@ +/* Multiple versions of logbf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__logbf) __logbf_ppc32 attribute_hidden; +extern __typeof (__logbf) __logbf_power7 attribute_hidden; + +libc_ifunc (__logbf, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __logbf_power7 + : __logbf_ppc32); + +weak_alias (__logbf, logbf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl-power7.c new file mode 100644 index 0000000000..547664dd4b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl-power7.c @@ -0,0 +1,21 @@ +/* logbl(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __logbl __logbl_power7 + +#include <sysdeps/powerpc/power7/fpu/s_logbl.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl-ppc32.c new file mode 100644 index 0000000000..c4361226dd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl-ppc32.c @@ -0,0 +1,21 @@ +/* logbl(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __logbl __logbl_ppc32 + +#include <sysdeps/ieee754/ldbl-128ibm/s_logbl.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl.c new file mode 100644 index 0000000000..167e9535cb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl.c @@ -0,0 +1,32 @@ +/* Multiple versions of logbl. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__logbl) __logbl_ppc32 attribute_hidden; +extern __typeof (__logbl) __logbl_power7 attribute_hidden; + +libc_ifunc (__logbl, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __logbl_power7 + : __logbl_ppc32); + +long_double_symbol (libm, __logbl, logbl); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrint-power6x.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrint-power6x.S new file mode 100644 index 0000000000..3be812e5dc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrint-power6x.S @@ -0,0 +1,33 @@ +/* Round double to long int. POWER6x PowerPC32 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __lrint __lrint_power6x + +#include <sysdeps/powerpc/powerpc32/power6x/fpu/s_lrint.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrint-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrint-ppc32.S new file mode 100644 index 0000000000..ee5725db03 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrint-ppc32.S @@ -0,0 +1,31 @@ +/* Round double to long int. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __lrint __lrint_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_lrint.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrint.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrint.c new file mode 100644 index 0000000000..ec7c991464 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrint.c @@ -0,0 +1,40 @@ +/* Multiple versions of lrint. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__lrint) __lrint_ppc32 attribute_hidden; +extern __typeof (__lrint) __lrint_power6x attribute_hidden; + +libc_ifunc (__lrint, + (hwcap & PPC_FEATURE_POWER6_EXT) ? + __lrint_power6x + : __lrint_ppc32); + +weak_alias (__lrint, lrint) + +#ifdef NO_LONG_DOUBLE +weak_alias (__lrint, lrintl) +strong_alias (__lrint, __lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrintf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrintf.c new file mode 100644 index 0000000000..4a7fa4bcfa --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lrintf.c @@ -0,0 +1,31 @@ +/* Multiple versions of lrintf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include "init-arch.h" + +/* It's safe to use double-precision implementation for single-precision. */ +extern __typeof (__lrintf) __lrint_ppc32 attribute_hidden; +extern __typeof (__lrintf) __lrint_power6x attribute_hidden; + +libc_ifunc (__lrintf, + (hwcap & PPC_FEATURE_POWER6_EXT) ? + __lrint_power6x + : __lrint_ppc32); + +weak_alias (__lrintf, lrintf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-power5+.S new file mode 100644 index 0000000000..7aa2364183 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-power5+.S @@ -0,0 +1,33 @@ +/* lround function. POWER5+, PowerPC32 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __lround __lround_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-power6x.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-power6x.S new file mode 100644 index 0000000000..a9d54d560d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-power6x.S @@ -0,0 +1,33 @@ +/* lround function. POWER6x, PowerPC32 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __lround __lround_power6x + +#include <sysdeps/powerpc/powerpc32/power6x/fpu/s_lround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-ppc32.S new file mode 100644 index 0000000000..78a931238a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-ppc32.S @@ -0,0 +1,31 @@ +/* lround function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __lround __lround_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_lround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround.c new file mode 100644 index 0000000000..fdc0c3dd8d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround.c @@ -0,0 +1,43 @@ +/* Multiple versions of lround. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__lround) __lround_ppc32 attribute_hidden; +extern __typeof (__lround) __lround_power5plus attribute_hidden; +extern __typeof (__lround) __lround_power6x attribute_hidden; + +libc_ifunc (__lround, + (hwcap & PPC_FEATURE_POWER6_EXT) ? + __lround_power6x + : (hwcap & PPC_FEATURE_POWER5_PLUS) ? + __lround_power5plus + : __lround_ppc32); + +weak_alias (__lround, lround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lroundf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lroundf.c new file mode 100644 index 0000000000..ff61dd6ca7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lroundf.c @@ -0,0 +1,34 @@ +/* Multiple versions of lroundf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include "init-arch.h" + +/* It's safe to use double-precision implementation for single-precision. */ +extern __typeof (__lroundf) __lround_ppc32 attribute_hidden; +extern __typeof (__lroundf) __lround_power5plus attribute_hidden; +extern __typeof (__lroundf) __lround_power6x attribute_hidden; + +libc_ifunc (__lroundf, + (hwcap & PPC_FEATURE_POWER6_EXT) ? + __lround_power6x + : (hwcap & PPC_FEATURE_POWER5_PLUS) ? + __lround_power5plus + : __lround_ppc32); + +weak_alias (__lroundf, lroundf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c new file mode 100644 index 0000000000..955b265045 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c @@ -0,0 +1,31 @@ +/* PowerPC/POWER5+ implementation for modf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __modf __modf_power5plus + +#include <sysdeps/powerpc/power5+/fpu/s_modf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-ppc32.c new file mode 100644 index 0000000000..6561fdf8e1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-ppc32.c @@ -0,0 +1,29 @@ +/* PowerPC32 default implementation for modf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) + +#define __modf __modf_ppc32 + +#include <sysdeps/ieee754/dbl-64/s_modf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf.c new file mode 100644 index 0000000000..537592ab16 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf.c @@ -0,0 +1,44 @@ +/* Multiple versions of modf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__modf) __modf_ppc32 attribute_hidden; +extern __typeof (__modf) __modf_power5plus attribute_hidden; + +libc_ifunc (__modf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __modf_power5plus + : __modf_ppc32); + +weak_alias (__modf, modf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__modf, __modfl) +weak_alias (__modf, modfl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __modf, modfl, GLIBC_2_0); +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __modf, modfl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c new file mode 100644 index 0000000000..f5a12a282a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c @@ -0,0 +1,27 @@ +/* PowerPC/POWER5+ implementation for modff. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __modff __modff_power5plus + +#include <sysdeps/powerpc/power5+/fpu/s_modff.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-ppc32.c new file mode 100644 index 0000000000..9b9fa971bf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-ppc32.c @@ -0,0 +1,26 @@ +/* PowerPC32 default implementation for modff. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __modff __modff_ppc32 + +#include <sysdeps/ieee754/flt-32/s_modff.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff.c new file mode 100644 index 0000000000..7ae682d124 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff.c @@ -0,0 +1,30 @@ +/* Multiple versions of modff. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include "init-arch.h" + +extern __typeof (__modff) __modff_ppc32 attribute_hidden; +extern __typeof (__modff) __modff_power5plus attribute_hidden; + +libc_ifunc (__modff, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __modff_power5plus + : __modff_ppc32); + +weak_alias (__modff, modff) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_round-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_round-power5+.S new file mode 100644 index 0000000000..02ab78b33c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_round-power5+.S @@ -0,0 +1,33 @@ +/* round function. PowerPC32/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __round __round_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_round.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_round-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_round-ppc32.S new file mode 100644 index 0000000000..b9e5bb6170 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_round-ppc32.S @@ -0,0 +1,31 @@ +/* round function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __round __round_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_round.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_round.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_round.c new file mode 100644 index 0000000000..46102862ac --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_round.c @@ -0,0 +1,40 @@ +/* Multiple versions of round. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__round) __round_ppc32 attribute_hidden; +extern __typeof (__round) __round_power5plus attribute_hidden; + +libc_ifunc (__round, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __round_power5plus + : __round_ppc32); + +weak_alias (__round, round) + +#ifdef NO_LONG_DOUBLE +strong_alias (__round, __roundl) +weak_alias (__round, roundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __round, roundl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_roundf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_roundf-power5+.S new file mode 100644 index 0000000000..442af4c1ea --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_roundf-power5+.S @@ -0,0 +1,26 @@ +/* roundf function. PowerPC32/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(name, alias) + +#define __roundf __roundf_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_roundf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_roundf-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_roundf-ppc32.S new file mode 100644 index 0000000000..abe74e2e1a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_roundf-ppc32.S @@ -0,0 +1,27 @@ +/* roundf function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __roundf __roundf_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_roundf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_roundf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_roundf.c new file mode 100644 index 0000000000..0a2e6d53cc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_roundf.c @@ -0,0 +1,32 @@ +/* Multiple versions of roundf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__roundf) __roundf_ppc32 attribute_hidden; +extern __typeof (__roundf) __roundf_power5plus attribute_hidden; + +libc_ifunc (__roundf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __roundf_power5plus + : __roundf_ppc32); + +weak_alias (__roundf, roundf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_trunc-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_trunc-power5+.S new file mode 100644 index 0000000000..129570ca34 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_trunc-power5+.S @@ -0,0 +1,33 @@ +/* trunc function. PowerPC32/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __trunc __trunc_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_trunc.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_trunc-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_trunc-ppc32.S new file mode 100644 index 0000000000..5e74248a9f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_trunc-ppc32.S @@ -0,0 +1,31 @@ +/* trunc function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __trunc __trunc_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_trunc.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_trunc.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_trunc.c new file mode 100644 index 0000000000..110e701218 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_trunc.c @@ -0,0 +1,40 @@ +/* Multiple versions of trunc. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__trunc) __trunc_ppc32 attribute_hidden; +extern __typeof (__trunc) __trunc_power5plus attribute_hidden; + +libc_ifunc (__trunc, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __trunc_power5plus + : __trunc_ppc32); + +weak_alias (__trunc, trunc) + +#ifdef NO_LONG_DOUBLE +strong_alias (__trunc, __truncl) +weak_alias (__trunc, truncl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __trunc, truncl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_truncf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_truncf-power5+.S new file mode 100644 index 0000000000..57ab878876 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_truncf-power5+.S @@ -0,0 +1,26 @@ +/* truncf function. PowerPC32/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(name, alias) + +#define __truncf __truncf_power5plus + +#include <sysdeps/powerpc/powerpc32/power5+/fpu/s_truncf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_truncf-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_truncf-ppc32.S new file mode 100644 index 0000000000..4dd0a6021a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_truncf-ppc32.S @@ -0,0 +1,27 @@ +/* truncf function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __truncf __truncf_ppc32 + +#include <sysdeps/powerpc/powerpc32/fpu/s_truncf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_truncf.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_truncf.c new file mode 100644 index 0000000000..ef6e97d000 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_truncf.c @@ -0,0 +1,32 @@ +/* Multiple versions of truncf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__truncf) __truncf_ppc32 attribute_hidden; +extern __typeof (__truncf) __truncf_power5plus attribute_hidden; + +libc_ifunc (__truncf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __truncf_power5plus + : __truncf_ppc32); + +weak_alias (__truncf, truncf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat-power5.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat-power5.S new file mode 100644 index 0000000000..7c5a504177 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat-power5.S @@ -0,0 +1,31 @@ +/* sqrt function. PowerPC32/POWER5 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __sqrt __sqrt_power5 + +#include <sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt_compat.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat-ppc32.S new file mode 100644 index 0000000000..534e934ac9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat-ppc32.S @@ -0,0 +1,31 @@ +/* sqrt function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __sqrt __sqrt_ppc32 + +#include <sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt_compat.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat.c new file mode 100644 index 0000000000..1e1892034e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrt_compat.c @@ -0,0 +1,40 @@ +/* Multiple versions of sqrt. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__sqrt) __sqrt_ppc32 attribute_hidden; +extern __typeof (__sqrt) __sqrt_power5 attribute_hidden; + +libc_ifunc (__sqrt, + (hwcap & PPC_FEATURE_POWER5) + ? __sqrt_power5 + : __sqrt_ppc32); + +weak_alias (__sqrt, sqrt) + +#ifdef NO_LONG_DOUBLE +strong_alias (__sqrt, __sqrtl) +weak_alias (__sqrt, sqrtl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat-power5.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat-power5.S new file mode 100644 index 0000000000..eacc042850 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat-power5.S @@ -0,0 +1,26 @@ +/* sqrtf function. PowerPC32/POWER5 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __sqrtf __sqrtf_power5 + +#include <sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf_compat.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat-ppc32.S new file mode 100644 index 0000000000..72191fc9a5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat-ppc32.S @@ -0,0 +1,26 @@ +/* sqrtf function. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __sqrtf __sqrtf_ppc32 + +#include <sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf_compat.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat.c new file mode 100644 index 0000000000..bbab4d4f93 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/multiarch/w_sqrtf_compat.c @@ -0,0 +1,32 @@ +/* Multiple versions of sqrtf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__sqrtf) __sqrtf_ppc32 attribute_hidden; +extern __typeof (__sqrtf) __sqrtf_power5 attribute_hidden; + +libc_ifunc (__sqrtf, + (hwcap & PPC_FEATURE_POWER5) + ? __sqrtf_power5 + : __sqrtf_ppc32); + +weak_alias (__sqrtf, sqrtf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S new file mode 100644 index 0000000000..d16dbb8406 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S @@ -0,0 +1,46 @@ +/* Round double to long int. PowerPC32 on PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long long int[r3, r4] __llrint (double x[fp1]) */ +ENTRY (__llrint) + CALL_MCOUNT + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + fctid fp13,fp1 + stfd fp13,8(r1) + nop /* Insure the following load is in a different dispatch group */ + nop /* to avoid pipe stall on POWER4&5. */ + nop + lwz r3,8+HIWORD(r1) + lwz r4,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__llrint) + +weak_alias (__llrint, llrint) + +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S new file mode 100644 index 0000000000..9c3dd77863 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S @@ -0,0 +1,38 @@ +/* Round float to long int. PowerPC32 on PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* long long int[r3, r4] __llrintf (float x[fp1]) */ +ENTRY (__llrintf) + CALL_MCOUNT + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + fctid fp13,fp1 + stfd fp13,8(r1) + nop /* Insure the following load is in a different dispatch group */ + nop /* to avoid pipe stall on POWER4&5. */ + nop + lwz r3,8+HIWORD(r1) + lwz r4,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__llrintf) + +weak_alias (__llrintf, llrintf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S new file mode 100644 index 0000000000..24bd533748 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S @@ -0,0 +1,106 @@ +/* llround function. PowerPC32 on PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section .rodata.cst8,"aM",@progbits,8 + .align 3 + .LC0: .long (52+127)<<23 /* 0x1p+52 */ + .long (-1+127)<<23 /* 0.5 */ + + .section ".text" + +/* long [r3] lround (float x [fp1]) + IEEE 1003.1 lround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "round to Nearest" mode. Instead we set + "round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. + + It is necessary to detect when x is (+-)0x1.fffffffffffffp-2 + because adding +-0.5 in this case will cause an erroneous shift, + carry and round. We simply return 0 if 0.5 > x > -0.5. Likewise + if x is and odd number between +-(2^52 and 2^53-1) a shift and + carry will erroneously round if biased with +-0.5. Therefore if x + is greater/less than +-2^52 we don't need to bias the number with + +-0.5. */ + +ENTRY (__llround) + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + addi r9,r9,.LC0-got_label@l + mtlr r11 + cfi_same_value (lr) + lfs fp9,0(r9) + lfs fp10,4(r9) +#else + lis r9,.LC0@ha + lfs fp9,.LC0@l(r9) /* Load 2^52 into fpr9. */ + lfs fp10,.LC0@l+4(r9) /* Load 0.5 into fpr10. */ +#endif + fabs fp2,fp1 /* Get the absolute value of x. */ + fsub fp12,fp10,fp10 /* Compute 0.0 into fpr12. */ + fcmpu cr6,fp2,fp10 /* if |x| < 0.5 */ + fcmpu cr7,fp2,fp9 /* if |x| >= 2^52 */ + fcmpu cr1,fp1,fp12 /* x is negative? x < 0.0 */ + blt- cr6,.Lretzero /* 0.5 > x < -0.5 so just return 0. */ + bge- cr7,.Lnobias /* 2^52 > x < -2^52 just convert with no bias. */ + fadd fp3,fp2,fp10 /* |x|+=0.5 bias to prepare to round. */ + bge cr1,.Lconvert /* x is positive so don't negate x. */ + fnabs fp3,fp3 /* -(|x|+=0.5) */ +.Lconvert: + fctidz fp4,fp3 /* Convert to Integer double word round toward 0. */ + stfd fp4,8(r1) + nop + nop + nop + lwz r3,8+HIWORD(r1) /* Load return as integer. */ + lwz r4,8+LOWORD(r1) +.Lout: + addi r1,r1,16 + blr +.Lretzero: /* 0.5 > x > -0.5 */ + li r3,0 /* return 0. */ + li r4,0 + b .Lout +.Lnobias: + fmr fp3,fp1 + b .Lconvert + END (__llround) + +weak_alias (__llround, llround) + +strong_alias (__llround, __llroundf) +weak_alias (__llround, llroundf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llroundf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llroundf.S new file mode 100644 index 0000000000..72d6181541 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/s_llroundf.S @@ -0,0 +1 @@ +/* __llroundf is in s_llround.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt_compat.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt_compat.S new file mode 100644 index 0000000000..bb896a33cd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt_compat.S @@ -0,0 +1,108 @@ +/* sqrt function. PowerPC32 version. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* double [fp1] sqrt (double x [fp1]) + Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). + The fsqrt instruction generates the correct value for all inputs and + sets the appropriate floating point exceptions. Extended checking is + only needed to set errno (via __kernel_standard) if the input value + is negative. + + The fsqrt will set FPCC and FU (Floating Point Unordered or NaN + to indicated that the input value was negative or NaN. Use Move to + Condition Register from FPSCR to copy the FPCC field to cr1. The + branch on summary overflow transfers control to w_sqrt to process + any error conditions. Otherwise we can return the result directly. + + This part of the function is a leaf routine, so no need to stack a + frame or execute prologue/epilogue code. This means it is safe to + transfer directly to w_sqrt as long as the input value (f1) is + preserved. Putting the sqrt result into f2 (double parameter 2) + allows passing both the input value and sqrt result into the extended + wrapper so there is no need to recompute. + + This tactic avoids the overhead of stacking a frame for the normal + (non-error) case. Until gcc supports prologue shrink-wrapping + this is the best we can do. */ + + .section ".text" + .machine power4 +EALIGN (__sqrt, 5, 0) + fsqrt fp2,fp1 + mcrfs cr1,4 + bso- cr1,.Lw_sqrt + fmr fp1,fp2 + blr + .align 4 +.Lw_sqrt: + mflr r0 + stwu r1,-16(r1) + cfi_adjust_cfa_offset(16) + fmr fp12,fp2 + stw r0,20(r1) + stw r30,8(r1) + cfi_offset(lr,20-16) + cfi_offset(r30,8-16) +#ifdef SHARED + SETUP_GOT_ACCESS(r30,got_label) + addis r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha + addi r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l + lwz r9,_LIB_VERSION@got(30) + lwz r0,0(r9) +#else + lis r9,_LIB_VERSION@ha + lwz r0,_LIB_VERSION@l(r9) +#endif +/* if (_LIB_VERSION == _IEEE_) return z; */ + cmpwi cr7,r0,-1 + beq- cr7,.L4 +/* if (x != x) return z; !isnan*/ + fcmpu cr7,fp1,fp1 + bne- cr7,.L4 +/* if (x < 0.0) + return __kernel_standard (x, x, 26) */ + fmr fp2,fp1 + fabs fp0,fp1 + li r3,26 + fcmpu cr7,fp1,fp0 + bne- cr7,.L11 +.L4: + lwz r0,20(r1) + fmr fp1,fp12 + lwz r30,8(r1) + addi r1,r1,16 + mtlr r0 + blr +.L11: + bl __kernel_standard@plt + fmr fp12,fp1 + b .L4 + END (__sqrt) + +weak_alias (__sqrt, sqrt) + +#ifdef NO_LONG_DOUBLE +weak_alias (__sqrt, sqrtl) +strong_alias (__sqrt, __sqrtl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf_compat.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf_compat.S new file mode 100644 index 0000000000..c304ab5ca2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf_compat.S @@ -0,0 +1,100 @@ +/* sqrtf function. PowerPC32 version. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* float [fp1] sqrts (float x [fp1]) + Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). + The fsqrts instruction generates the correct value for all inputs and + sets the appropriate floating point exceptions. Extended checking is + only needed to set errno (via __kernel_standard) if the input value + is negative. + + The fsqrts will set FPCC and FU (Floating Point Unordered or NaN + to indicated that the input value was negative or NaN. Use Move to + Condition Register from FPSCR to copy the FPCC field to cr1. The + branch on summary overflow transfers control to w_sqrt to process + any error conditions. Otherwise we can return the result directly. + + This part of the function is a leaf routine, so no need to stack a + frame or execute prologue/epilogue code. This means it is safe to + transfer directly to w_sqrt as long as the input value (f1) is + preserved. Putting the sqrt result into f2 (float parameter 2) + allows passing both the input value and sqrt result into the extended + wrapper so there is no need to recompute. + + This tactic avoids the overhead of stacking a frame for the normal + (non-error) case. Until gcc supports prologue shrink-wrapping + this is the best we can do. */ + + .section ".text" + .machine power4 +EALIGN (__sqrtf, 5, 0) + fsqrts fp2,fp1 + mcrfs cr1,4 + bso- cr1,.Lw_sqrtf + fmr fp1,fp2 + blr + .align 4 +.Lw_sqrtf: + mflr r0 + stwu r1,-16(r1) + cfi_adjust_cfa_offset(16) + fmr fp12,fp2 + stw r0,20(r1) + stw r30,8(r1) + cfi_offset(lr,20-16) + cfi_offset(r30,8-16) +#ifdef SHARED + SETUP_GOT_ACCESS(r30,got_label) + addis r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha + addi r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l + lwz r9,_LIB_VERSION@got(30) + lwz r0,0(r9) +#else + lis r9,_LIB_VERSION@ha + lwz r0,_LIB_VERSION@l(r9) +#endif +/* if (_LIB_VERSION == _IEEE_) return z; */ + cmpwi cr7,r0,-1 + beq- cr7,.L4 +/* if (x != x, 0) return z; !isnan */ + fcmpu cr7,fp1,fp1 + bne- cr7,.L4 +/* if (x < 0.0) + return __kernel_standard (x, x, 126) */ + fmr fp2,fp1 + fabs fp0,fp1 + li r3,126 + fcmpu cr7,1,0 + bne- cr7,.L11 +.L4: + lwz r0,20(r1) + fmr fp1,fp12 + lwz r30,8(r1) + addi r1,r1,16 + mtlr r0 + blr +.L11: + bl __kernel_standard@plt + fmr fp12,fp1 + b .L4 + END (__sqrtf) + +weak_alias (__sqrtf, sqrtf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/hp-timing.h b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/hp-timing.h new file mode 100644 index 0000000000..93cce4625a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/hp-timing.h @@ -0,0 +1,54 @@ +/* High precision, low overhead timing functions. powerpc64 version. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _HP_TIMING_H +#define _HP_TIMING_H 1 + +/* We always assume having the timestamp register. */ +#define HP_TIMING_AVAIL (1) +#define HP_SMALL_TIMING_AVAIL (1) + +/* We indeed have inlined functions. */ +#define HP_TIMING_INLINE (1) + +/* We use 64bit values for the times. */ +typedef unsigned long long int hp_timing_t; + +/* That's quite simple. Use the `mftb' instruction. Note that the value + might not be 100% accurate since there might be some more instructions + running in this moment. This could be changed by using a barrier like + 'lwsync' right before the `mftb' instruction. But we are not interested + in accurate clock cycles here so we don't do this. */ + +#define HP_TIMING_NOW(Var) \ + do { \ + unsigned int hi, lo, tmp; \ + __asm__ __volatile__ ("1: mfspr %0,269;" \ + " mfspr %1,268;" \ + " mfspr %2,269;" \ + " cmpw %0,%2;" \ + " bne 1b;" \ + : "=&r" (hi), "=&r" (lo), "=&r" (tmp) \ + : : "cr0"); \ + Var = ((hp_timing_t) hi << 32) | lo; \ + } while (0) + +#include <hp-timing-common.h> + +#endif /* hp-timing.h */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memcmp.S new file mode 100644 index 0000000000..c6270d347f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memcmp.S @@ -0,0 +1,1375 @@ +/* Optimized strcmp implementation for PowerPC32. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memcmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + + .machine power4 +EALIGN (memcmp, 4, 0) + CALL_MCOUNT + +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r8 /* next word in s1 */ +#define rWORD4 r9 /* next word in s2 */ +#define rWORD5 r10 /* next word in s1 */ +#define rWORD6 r11 /* next word in s2 */ +#define rWORD7 r30 /* next word in s1 */ +#define rWORD8 r31 /* next word in s2 */ + + xor r0, rSTR2, rSTR1 + cmplwi cr6, rN, 0 + cmplwi cr1, rN, 12 + clrlwi. r0, r0, 30 + clrlwi r12, rSTR1, 30 + cmplwi cr5, r12, 0 + beq- cr6, L(zeroLength) + dcbt 0, rSTR1 + dcbt 0, rSTR2 +/* If less than 8 bytes or not aligned, use the unaligned + byte loop. */ + blt cr1, L(bytealigned) + stwu 1, -64(r1) + cfi_adjust_cfa_offset(64) + stw rWORD8, 48(r1) + stw rWORD7, 44(r1) + cfi_offset(rWORD8, (48-64)) + cfi_offset(rWORD7, (44-64)) + bne L(unaligned) +/* At this point we know both strings have the same alignment and the + compare length is at least 8 bytes. r12 contains the low order + 2 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then we are already word + aligned and can perform the word aligned loop. + + Otherwise we know the two strings have the same alignment (but not + yet word aligned). So we force the string addresses to the next lower + word boundary and special case this first word using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (word aligned) compare loop, starting at the second word, + we need to adjust the length (rN) and special case the loop + versioning for the first word. This ensures that the loop count is + correct and the first word (shifted) is in the expected register pair. */ + .align 4 +L(samealignment): + clrrwi rSTR1, rSTR1, 2 + clrrwi rSTR2, rSTR2, 2 + beq cr5, L(Waligned) + add rN, rN, r12 + slwi rWORD6, r12, 3 + srwi r0, rN, 4 /* Divide by 16 */ + andi. r12, rN, 12 /* Get the word remainder */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) +#endif + cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + clrlwi rN, rN, 30 + beq L(dPs4) + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(dPs3) + beq cr1, L(dPs2) + +/* Remainder is 4 */ + .align 3 +L(dsP1): + slw rWORD5, rWORD1, rWORD6 + slw rWORD6, rWORD2, rWORD6 + cmplw cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +/* Do something useful in this cycle since we have to branch anyway. */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + b L(dP1e) +/* Remainder is 8 */ + .align 4 +L(dPs2): + slw rWORD5, rWORD1, rWORD6 + slw rWORD6, rWORD2, rWORD6 + cmplw cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +/* Do something useful in this cycle since we have to branch anyway. */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 4(rSTR1) + lwz rWORD8, 4(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + b L(dP2e) +/* Remainder is 12 */ + .align 4 +L(dPs3): + slw rWORD3, rWORD1, rWORD6 + slw rWORD4, rWORD2, rWORD6 + cmplw cr1, rWORD3, rWORD4 + b L(dP3e) +/* Count is a multiple of 16, remainder is 0 */ + .align 4 +L(dPs4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + slw rWORD1, rWORD1, rWORD6 + slw rWORD2, rWORD2, rWORD6 + cmplw cr7, rWORD1, rWORD2 + b L(dP4e) + +/* At this point we know both strings are word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(Waligned): + andi. r12, rN, 12 /* Get the word remainder */ + srwi r0, rN, 4 /* Divide by 16 */ + cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + clrlwi rN, rN, 30 + beq L(dP4) + bgt cr1, L(dP3) + beq cr1, L(dP2) + +/* Remainder is 4 */ + .align 4 +L(dP1): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not + change any on the early exit path. The key here is the non-early + exit path only cares about the condition code (cr5), not about which + register pair was used. */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 0(rSTR1) + lwz rWORD6, 0(rSTR2) +#endif + cmplw cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 +L(dP1e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5x) + bne cr7, L(dLcr7x) + +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 16(rSTR1) + lwzu rWORD8, 16(rSTR2) +#endif + bne cr1, L(dLcr1) + cmplw cr5, rWORD7, rWORD8 + bdnz L(dLoop) + bne cr6, L(dLcr6) + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) + .align 3 +L(dP1x): + slwi. r12, rN, 3 + bne cr5, L(dLcr5x) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ + addi 1, 1, 64 + cfi_adjust_cfa_offset(-64) + bne L(d00) + li rRTN, 0 + blr + +/* Remainder is 8 */ + .align 4 + cfi_adjust_cfa_offset(64) +L(dP2): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 0(rSTR1) + lwz rWORD6, 0(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 4(rSTR1) + lwz rWORD8, 4(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 +L(dP2e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 8(rSTR1) + lwz rWORD2, 8(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 12(rSTR1) + lwz rWORD4, 12(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#endif + bne cr6, L(dLcr6) + bne cr5, L(dLcr5) + b L(dLoop2) +/* Again we are on a early exit path (16-23 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP2x): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 4(rSTR1) + lwz rWORD4, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + slwi. r12, rN, 3 + bne cr6, L(dLcr6x) +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#endif + bne cr1, L(dLcr1x) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ + addi 1, 1, 64 + cfi_adjust_cfa_offset(-64) + bne L(d00) + li rRTN, 0 + blr + +/* Remainder is 12 */ + .align 4 + cfi_adjust_cfa_offset(64) +L(dP3): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 0(rSTR1) + lwz rWORD4, 0(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 +L(dP3e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 4(rSTR1) + lwz rWORD6, 4(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + blt cr7, L(dP3x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 8(rSTR1) + lwz rWORD8, 8(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 12(rSTR1) + lwz rWORD2, 12(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + bne cr1, L(dLcr1) + bne cr6, L(dLcr6) + b L(dLoop1) +/* Again we are on a early exit path (24-31 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP3x): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 8(rSTR1) + lwz rWORD2, 8(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + slwi. r12, rN, 3 + bne cr1, L(dLcr1x) +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + bne cr6, L(dLcr6x) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ + bne cr7, L(dLcr7x) + addi 1, 1, 64 + cfi_adjust_cfa_offset(-64) + bne L(d00) + li rRTN, 0 + blr + +/* Count is a multiple of 16, remainder is 0 */ + .align 4 + cfi_adjust_cfa_offset(64) +L(dP4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 +L(dP4e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 4(rSTR1) + lwz rWORD4, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 8(rSTR1) + lwz rWORD6, 8(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 12(rSTR1) + lwzu rWORD8, 12(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) + bne cr1, L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(dLoop): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) +L(dLoop1): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) +L(dLoop2): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) +L(dLoop3): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 16(rSTR1) + lwzu rWORD8, 16(rSTR2) +#endif + bne- cr1, L(dLcr1) + cmplw cr7, rWORD1, rWORD2 + bdnz+ L(dLoop) + +L(dL4): + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) + cmplw cr5, rWORD7, rWORD8 +L(d44): + bne cr7, L(dLcr7) +L(d34): + bne cr1, L(dLcr1) +L(d24): + bne cr6, L(dLcr6) +L(d14): + slwi. r12, rN, 3 + bne cr5, L(dLcr5) +L(d04): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) + addi 1, 1, 64 + cfi_adjust_cfa_offset(-64) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ + beq L(zeroLength) +/* At this point we have a remainder of 1 to 3 bytes to compare. Since + we are aligned it is safe to load the whole word, and use + shift right to eliminate bits beyond the compare length. */ +L(d00): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + srw rWORD1, rWORD1, rN + srw rWORD2, rWORD2, rN + sub rRTN, rWORD1, rWORD2 + blr + + .align 4 + cfi_adjust_cfa_offset(64) +L(dLcr7): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) +L(dLcr7x): + li rRTN, 1 + addi 1, 1, 64 + cfi_adjust_cfa_offset(-64) + bgtlr cr7 + li rRTN, -1 + blr + .align 4 + cfi_adjust_cfa_offset(64) +L(dLcr1): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) +L(dLcr1x): + li rRTN, 1 + addi 1, 1, 64 + cfi_adjust_cfa_offset(-64) + bgtlr cr1 + li rRTN, -1 + blr + .align 4 + cfi_adjust_cfa_offset(64) +L(dLcr6): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) +L(dLcr6x): + li rRTN, 1 + addi 1, 1, 64 + cfi_adjust_cfa_offset(-64) + bgtlr cr6 + li rRTN, -1 + blr + .align 4 + cfi_adjust_cfa_offset(64) +L(dLcr5): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) +L(dLcr5x): + li rRTN, 1 + addi 1, 1, 64 + cfi_adjust_cfa_offset(-64) + bgtlr cr5 + li rRTN, -1 + blr + + .align 4 +L(bytealigned): + mtctr rN /* Power4 wants mtctr 1st in dispatch group */ + +/* We need to prime this loop. This loop is swing modulo scheduled + to avoid pipe delays. The dependent instruction latencies (load to + compare to conditional branch) is 2 to 3 cycles. In this loop each + dispatch group ends in a branch and takes 1 cycle. Effectively + the first iteration of the loop only serves to load operands and + branches based on compares are delayed until the next loop. + + So we must precondition some registers and condition codes so that + we don't exit the loop early on the first iteration. */ + + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + bdz- L(b11) + cmplw cr7, rWORD1, rWORD2 + lbz rWORD3, 1(rSTR1) + lbz rWORD4, 1(rSTR2) + bdz- L(b12) + cmplw cr1, rWORD3, rWORD4 + lbzu rWORD5, 2(rSTR1) + lbzu rWORD6, 2(rSTR2) + bdz- L(b13) + .align 4 +L(bLoop): + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + bne- cr7, L(bLcr7) + + cmplw cr6, rWORD5, rWORD6 + bdz- L(b3i) + + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + bne- cr1, L(bLcr1) + + cmplw cr7, rWORD1, rWORD2 + bdz- L(b2i) + + lbzu rWORD5, 1(rSTR1) + lbzu rWORD6, 1(rSTR2) + bne- cr6, L(bLcr6) + + cmplw cr1, rWORD3, rWORD4 + bdnz+ L(bLoop) + +/* We speculatively loading bytes before we have tested the previous + bytes. But we must avoid overrunning the length (in the ctr) to + prevent these speculative loads from causing a segfault. In this + case the loop will exit early (before the all pending bytes are + tested. In this case we must complete the pending operations + before returning. */ +L(b1i): + bne- cr7, L(bLcr7) + bne- cr1, L(bLcr1) + b L(bx56) + .align 4 +L(b2i): + bne- cr6, L(bLcr6) + bne- cr7, L(bLcr7) + b L(bx34) + .align 4 +L(b3i): + bne- cr1, L(bLcr1) + bne- cr6, L(bLcr6) + b L(bx12) + .align 4 +L(bLcr7): + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr +L(bLcr1): + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr +L(bLcr6): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + +L(b13): + bne- cr7, L(bx12) + bne- cr1, L(bx34) +L(bx56): + sub rRTN, rWORD5, rWORD6 + blr + nop +L(b12): + bne- cr7, L(bx12) +L(bx34): + sub rRTN, rWORD3, rWORD4 + blr +L(b11): +L(bx12): + sub rRTN, rWORD1, rWORD2 + blr + .align 4 +L(zeroLength): + li rRTN, 0 + blr + + .align 4 +/* At this point we know the strings have different alignment and the + compare length is at least 8 bytes. r12 contains the low order + 2 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then rStr1 is word aligned and can + perform the Wunaligned loop. + + Otherwise we know that rSTR1 is not already word aligned yet. + So we can force the string addresses to the next lower word + boundary and special case this first word using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (Wualigned) compare loop, starting at the second word, + we need to adjust the length (rN) and special case the loop + versioning for the first W. This ensures that the loop count is + correct and the first W (shifted) is in the expected resister pair. */ +#define rSHL r29 /* Unaligned shift left count. */ +#define rSHR r28 /* Unaligned shift right count. */ +#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */ +#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */ +#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */ +#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */ + cfi_adjust_cfa_offset(64) +L(unaligned): + stw rSHL, 40(r1) + cfi_offset(rSHL, (40-64)) + clrlwi rSHL, rSTR2, 30 + stw rSHR, 36(r1) + cfi_offset(rSHR, (36-64)) + beq cr5, L(Wunaligned) + stw rWORD8_SHIFT, 32(r1) + cfi_offset(rWORD8_SHIFT, (32-64)) +/* Adjust the logical start of rSTR2 to compensate for the extra bits + in the 1st rSTR1 W. */ + sub rWORD8_SHIFT, rSTR2, r12 +/* But do not attempt to address the W before that W that contains + the actual start of rSTR2. */ + clrrwi rSTR2, rSTR2, 2 + stw rWORD2_SHIFT, 28(r1) +/* Compute the left/right shift counts for the unaligned rSTR2, + compensating for the logical (W aligned) start of rSTR1. */ + clrlwi rSHL, rWORD8_SHIFT, 30 + clrrwi rSTR1, rSTR1, 2 + stw rWORD4_SHIFT, 24(r1) + slwi rSHL, rSHL, 3 + cmplw cr5, rWORD8_SHIFT, rSTR2 + add rN, rN, r12 + slwi rWORD6, r12, 3 + stw rWORD6_SHIFT, 20(r1) + cfi_offset(rWORD2_SHIFT, (28-64)) + cfi_offset(rWORD4_SHIFT, (24-64)) + cfi_offset(rWORD6_SHIFT, (20-64)) + subfic rSHR, rSHL, 32 + srwi r0, rN, 4 /* Divide by 16 */ + andi. r12, rN, 12 /* Get the W remainder */ +/* We normally need to load 2 Ws to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a W where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ + li rWORD8, 0 + blt cr5, L(dus0) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD8, 0(rSTR2) + addi rSTR2, rSTR2, 4 +#endif + slw rWORD8, rWORD8, rSHL + +L(dus0): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) +#endif + cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + srw r12, rWORD2, rSHR + clrlwi rN, rN, 30 + beq L(duPs4) + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + or rWORD8, r12, rWORD8 + bgt cr1, L(duPs3) + beq cr1, L(duPs2) + +/* Remainder is 4 */ + .align 4 +L(dusP1): + slw rWORD8_SHIFT, rWORD2, rSHL + slw rWORD7, rWORD1, rWORD6 + slw rWORD8, rWORD8, rWORD6 + bge cr7, L(duP1e) +/* At this point we exit early with the first word compare + complete and remainder of 0 to 3 bytes. See L(du14) for details on + how we handle the remaining bytes. */ + cmplw cr5, rWORD7, rWORD8 + slwi. rN, rN, 3 + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 4(rSTR2) +#endif + srw r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 8 */ + .align 4 +L(duPs2): + slw rWORD6_SHIFT, rWORD2, rSHL + slw rWORD5, rWORD1, rWORD6 + slw rWORD6, rWORD8, rWORD6 + b L(duP2e) +/* Remainder is 12 */ + .align 4 +L(duPs3): + slw rWORD4_SHIFT, rWORD2, rSHL + slw rWORD3, rWORD1, rWORD6 + slw rWORD4, rWORD8, rWORD6 + b L(duP3e) +/* Count is a multiple of 16, remainder is 0 */ + .align 4 +L(duPs4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + or rWORD8, r12, rWORD8 + slw rWORD2_SHIFT, rWORD2, rSHL + slw rWORD1, rWORD1, rWORD6 + slw rWORD2, rWORD8, rWORD6 + b L(duP4e) + +/* At this point we know rSTR1 is word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(Wunaligned): + stw rWORD8_SHIFT, 32(r1) + clrrwi rSTR2, rSTR2, 2 + stw rWORD2_SHIFT, 28(r1) + srwi r0, rN, 4 /* Divide by 16 */ + stw rWORD4_SHIFT, 24(r1) + andi. r12, rN, 12 /* Get the W remainder */ + stw rWORD6_SHIFT, 20(r1) + cfi_offset(rWORD8_SHIFT, (32-64)) + cfi_offset(rWORD2_SHIFT, (28-64)) + cfi_offset(rWORD4_SHIFT, (24-64)) + cfi_offset(rWORD6_SHIFT, (20-64)) + slwi rSHL, rSHL, 3 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD6, 0, rSTR2 + addi rSTR2, rSTR2, 4 + lwbrx rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD6, 0(rSTR2) + lwzu rWORD8, 4(rSTR2) +#endif + cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + clrlwi rN, rN, 30 + subfic rSHR, rSHL, 32 + slw rWORD6_SHIFT, rWORD6, rSHL + beq L(duP4) + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(duP3) + beq cr1, L(duP2) + +/* Remainder is 4 */ + .align 4 +L(duP1): + srw r12, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + addi rSTR1, rSTR1, 4 +#else + lwz rWORD7, 0(rSTR1) +#endif + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP1x) +L(duP1e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + srw r0, rWORD2, rSHR + slw rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + srw r12, rWORD4, rSHR + slw rWORD4_SHIFT, rWORD4, rSHL + bne cr5, L(duLcr5) + or rWORD4, r12, rWORD2_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + srw r0, rWORD6, rSHR + slw rWORD6_SHIFT, rWORD6, rSHL + bne cr7, L(duLcr7) + or rWORD6, r0, rWORD4_SHIFT + cmplw cr6, rWORD5, rWORD6 + b L(duLoop3) + .align 4 +/* At this point we exit early with the first word compare + complete and remainder of 0 to 3 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +L(duP1x): + cmplw cr5, rWORD7, rWORD8 + slwi. rN, rN, 3 + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 8(rSTR2) +#endif + srw r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 8 */ + .align 4 +L(duP2): + srw r0, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + addi rSTR1, rSTR1, 4 +#else + lwz rWORD5, 0(rSTR1) +#endif + or rWORD6, r0, rWORD6_SHIFT + slw rWORD6_SHIFT, rWORD8, rSHL +L(duP2e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 4(rSTR1) + lwz rWORD8, 4(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + srw r12, rWORD8, rSHR + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP2x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 8(rSTR1) + lwz rWORD2, 8(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srw r0, rWORD2, rSHR + slw rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 12(rSTR1) + lwz rWORD4, 12(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + bne cr5, L(duLcr5) + srw r12, rWORD4, rSHR + slw rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#endif + cmplw cr1, rWORD3, rWORD4 + b L(duLoop2) + .align 4 +L(duP2x): + cmplw cr5, rWORD7, rWORD8 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#endif + bne cr6, L(duLcr6) + slwi. rN, rN, 3 + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 4(rSTR2) +#endif + srw r0, rWORD2, rSHR + b L(dutrim) + +/* Remainder is 12 */ + .align 4 +L(duP3): + srw r12, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + addi rSTR1, rSTR1, 4 +#else + lwz rWORD3, 0(rSTR1) +#endif + slw rWORD4_SHIFT, rWORD8, rSHL + or rWORD4, r12, rWORD6_SHIFT +L(duP3e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 4(rSTR1) + lwz rWORD6, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + srw r0, rWORD6, rSHR + slw rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 8(rSTR1) + lwz rWORD8, 8(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srw r12, rWORD8, rSHR + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP3x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 12(rSTR1) + lwz rWORD2, 12(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srw r0, rWORD2, rSHR + slw rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + cmplw cr7, rWORD1, rWORD2 + b L(duLoop1) + .align 4 +L(duP3x): +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif +#if 0 +/* Huh? We've already branched on cr1! */ + bne cr1, L(duLcr1) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + slwi. rN, rN, 3 + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 4(rSTR2) +#endif + srw r0, rWORD2, rSHR + b L(dutrim) + +/* Count is a multiple of 16, remainder is 0 */ + .align 4 +L(duP4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + srw r0, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + addi rSTR1, rSTR1, 4 +#else + lwz rWORD1, 0(rSTR1) +#endif + slw rWORD2_SHIFT, rWORD8, rSHL + or rWORD2, r0, rWORD6_SHIFT +L(duP4e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 4(rSTR1) + lwz rWORD4, 4(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + srw r12, rWORD4, rSHR + slw rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 8(rSTR1) + lwz rWORD6, 8(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + bne cr7, L(duLcr7) + srw r0, rWORD6, rSHR + slw rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 12(rSTR1) + lwzu rWORD8, 12(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srw r12, rWORD8, rSHR + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + cmplw cr5, rWORD7, rWORD8 + bdz- L(du24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(duLoop): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + srw r0, rWORD2, rSHR + slw rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +L(duLoop1): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + srw r12, rWORD4, rSHR + slw rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +L(duLoop2): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr7, L(duLcr7) + srw r0, rWORD6, rSHR + slw rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +L(duLoop3): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 16(rSTR1) + lwzu rWORD8, 16(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + bne- cr1, L(duLcr1) + srw r12, rWORD8, rSHR + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + bdnz+ L(duLoop) + +L(duL4): +#if 0 +/* Huh? We've already branched on cr1! */ + bne cr1, L(duLcr1) +#endif + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + cmplw cr5, rWORD7, rWORD8 +L(du44): + bne cr7, L(duLcr7) +L(du34): + bne cr1, L(duLcr1) +L(du24): + bne cr6, L(duLcr6) +L(du14): + slwi. rN, rN, 3 + bne cr5, L(duLcr5) +/* At this point we have a remainder of 1 to 3 bytes to compare. We use + shift right to eliminate bits beyond the compare length. + This allows the use of word subtract to compute the final result. + + However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in + rWORD8_SHIFT). */ + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 4(rSTR2) +#endif + srw r0, rWORD2, rSHR + .align 4 +L(dutrim): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 +#else + lwz rWORD1, 4(rSTR1) +#endif + lwz rWORD8, 48(r1) + subfic rN, rN, 32 /* Shift count is 32 - (rN * 8). */ + or rWORD2, r0, rWORD8_SHIFT + lwz rWORD7, 44(r1) + lwz rSHL, 40(r1) + srw rWORD1, rWORD1, rN + srw rWORD2, rWORD2, rN + lwz rSHR, 36(r1) + lwz rWORD8_SHIFT, 32(r1) + sub rRTN, rWORD1, rWORD2 + b L(dureturn26) + .align 4 +L(duLcr7): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr7, L(dureturn29) + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr1): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr1, L(dureturn29) + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr6): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr6, L(dureturn29) + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr5): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr5, L(dureturn29) + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) + .align 3 +L(duZeroReturn): + li rRTN, 0 + .align 4 +L(dureturn): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) +L(dureturn29): + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) +L(dureturn27): + lwz rWORD8_SHIFT, 32(r1) +L(dureturn26): + lwz rWORD2_SHIFT, 28(r1) +L(dureturn25): + lwz rWORD4_SHIFT, 24(r1) + lwz rWORD6_SHIFT, 20(r1) + addi 1, 1, 64 + cfi_adjust_cfa_offset(-64) + blr +END (memcmp) + +libc_hidden_builtin_def (memcmp) +weak_alias (memcmp, bcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memcopy.h b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memcopy.h new file mode 100644 index 0000000000..c76739e390 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memcopy.h @@ -0,0 +1,116 @@ +/* memcopy.h -- definitions for memory copy functions. Generic C version. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Torbjorn Granlund (tege@sics.se). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* The strategy of the memory functions is: + + 1. Copy bytes until the destination pointer is aligned. + + 2. Copy words in unrolled loops. If the source and destination + are not aligned in the same way, use word memory operations, + but shift and merge two read words before writing. + + 3. Copy the few remaining bytes. + + This is fast on processors that have at least 10 registers for + allocation by GCC, and that can access memory at reg+const in one + instruction. + + I made an "exhaustive" test of this memmove when I wrote it, + exhaustive in the sense that I tried all alignment and length + combinations, with and without overlap. */ + +#include <sysdeps/generic/memcopy.h> + +/* The macros defined in this file are: + + BYTE_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_to_copy) + + BYTE_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_to_copy) + + WORD_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_remaining, nbytes_to_copy) + + WORD_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_remaining, nbytes_to_copy) + + MERGE(old_word, sh_1, new_word, sh_2) + [I fail to understand. I feel stupid. --roland] +*/ + + +/* Threshold value for when to enter the unrolled loops. */ +#undef OP_T_THRES +#define OP_T_THRES 16 + +/* Copy exactly NBYTES bytes from SRC_BP to DST_BP, + without any assumptions about alignment of the pointers. */ +#undef BYTE_COPY_FWD +#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \ + do \ + { \ + size_t __nbytes = (nbytes); \ + if (__nbytes & 1) \ + { \ + ((byte *) dst_bp)[0] = ((byte *) src_bp)[0]; \ + src_bp += 1; \ + dst_bp += 1; \ + __nbytes -= 1; \ + } \ + while (__nbytes > 0) \ + { \ + byte __x = ((byte *) src_bp)[0]; \ + byte __y = ((byte *) src_bp)[1]; \ + src_bp += 2; \ + __nbytes -= 2; \ + ((byte *) dst_bp)[0] = __x; \ + ((byte *) dst_bp)[1] = __y; \ + dst_bp += 2; \ + } \ + } while (0) + +/* Copy exactly NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR, + beginning at the bytes right before the pointers and continuing towards + smaller addresses. Don't assume anything about alignment of the + pointers. */ +#undef BYTE_COPY_BWD +#define BYTE_COPY_BWD(dst_ep, src_ep, nbytes) \ + do \ + { \ + size_t __nbytes = (nbytes); \ + if (__nbytes & 1) \ + { \ + src_ep -= 1; \ + dst_ep -= 1; \ + ((byte *) dst_ep)[0] = ((byte *) src_ep)[0]; \ + __nbytes -= 1; \ + } \ + while (__nbytes > 0) \ + { \ + byte __x, __y; \ + src_ep -= 2; \ + __y = ((byte *) src_ep)[1]; \ + __x = ((byte *) src_ep)[0]; \ + dst_ep -= 2; \ + __nbytes -= 2; \ + ((byte *) dst_ep)[1] = __y; \ + ((byte *) dst_ep)[0] = __x; \ + } \ + } while (0) + +/* The powerpc memcpy implementation is safe to use for memmove. */ +#undef MEMCPY_OK_FOR_FWD_MEMMOVE +#define MEMCPY_OK_FOR_FWD_MEMMOVE 1 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memcpy.S new file mode 100644 index 0000000000..37bc712dd9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memcpy.S @@ -0,0 +1,481 @@ +/* Optimized memcpy implementation for PowerPC32 on PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. + + Memcpy handles short copies (< 32-bytes) using a binary move blocks + (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled + with the appropriate combination of byte and halfword load/stores. + There is minimal effort to optimize the alignment of short moves. + + Longer moves (>= 32-bytes) justify the effort to get at least the + destination word (4-byte) aligned. Further optimization is + possible when both source and destination are word aligned. + Each case has an optimized unrolled loop. */ + + .machine power4 +EALIGN (memcpy, 5, 0) + CALL_MCOUNT + + stwu 1,-32(1) + cfi_adjust_cfa_offset(32) + stw 30,20(1) + cfi_offset(30,(20-32)) + mr 30,3 + cmplwi cr1,5,31 + stw 31,24(1) + cfi_offset(31,(24-32)) + neg 0,3 + andi. 11,3,3 /* check alignment of dst. */ + clrlwi 0,0,30 /* Number of bytes until the 1st word of dst. */ + clrlwi 10,4,30 /* check alignment of src. */ + cmplwi cr6,5,8 + ble- cr1,.L2 /* If move < 32 bytes use short move code. */ + cmplw cr6,10,11 + mr 12,4 + srwi 9,5,2 /* Number of full words remaining. */ + mtcrf 0x01,0 + mr 31,5 + beq .L0 + + subf 31,0,5 + /* Move 0-3 bytes as needed to get the destination word aligned. */ +1: bf 31,2f + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: bf 30,0f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +0: + clrlwi 10,12,30 /* check alignment of src again. */ + srwi 9,31,2 /* Number of full words remaining. */ + + /* Copy words from source to destination, assuming the destination is + aligned on a word boundary. + + At this point we know there are at least 25 bytes left (32-7) to copy. + The next step is to determine if the source is also word aligned. + If not branch to the unaligned move code at .L6. which uses + a load, shift, store strategy. + + Otherwise source and destination are word aligned, and we can use + the optimized word copy loop. */ +.L0: + clrlwi 11,31,30 /* calculate the number of tail bytes */ + mtcrf 0x01,9 + bne- cr6,.L6 /* If source is not word aligned. */ + + /* Move words where destination and source are word aligned. + Use an unrolled loop to copy 4 words (16-bytes) per iteration. + If the copy is not an exact multiple of 16 bytes, 1-3 + words are copied as needed to set up the main loop. After + the main loop exits there may be a tail of 1-3 bytes. These bytes are + copied a halfword/byte at a time as needed to preserve alignment. */ + + srwi 8,31,4 /* calculate the 16 byte loop count */ + cmplwi cr1,9,4 + cmplwi cr6,11,0 + mr 11,12 + + bf 30,1f + lwz 6,0(12) + lwz 7,4(12) + addi 11,12,8 + mtctr 8 + stw 6,0(3) + stw 7,4(3) + addi 10,3,8 + bf 31,4f + lwz 0,8(12) + stw 0,8(3) + blt cr1,3f + addi 11,12,12 + addi 10,3,12 + b 4f + .align 4 +1: + mr 10,3 + mtctr 8 + bf 31,4f + lwz 6,0(12) + addi 11,12,4 + stw 6,0(3) + addi 10,3,4 + + .align 4 +4: + lwz 6,0(11) + lwz 7,4(11) + lwz 8,8(11) + lwz 0,12(11) + stw 6,0(10) + stw 7,4(10) + stw 8,8(10) + stw 0,12(10) + addi 11,11,16 + addi 10,10,16 + bdnz 4b +3: + clrrwi 0,31,2 + mtcrf 0x01,31 + beq cr6,0f +.L9: + add 3,3,0 + add 12,12,0 + +/* At this point we have a tail of 0-3 bytes and we know that the + destination is word aligned. */ +2: bf 30,1f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + mr 3,30 + lwz 30,20(1) + lwz 31,24(1) + addi 1,1,32 + blr + +/* Copy up to 31 bytes. This is divided into two cases 0-8 bytes and + 9-31 bytes. Each case is handled without loops, using binary + (1,2,4,8) tests. + + In the short (0-8 byte) case no attempt is made to force alignment + of either source or destination. The hardware will handle the + unaligned load/stores with small delays for crossing 32- 64-byte, and + 4096-byte boundaries. Since these short moves are unlikely to be + unaligned or cross these boundaries, the overhead to force + alignment is not justified. + + The longer (9-31 byte) move is more likely to cross 32- or 64-byte + boundaries. Since only loads are sensitive to the 32-/64-byte + boundaries it is more important to align the source than the + destination. If the source is not already word aligned, we first + move 1-3 bytes as needed. While the destination and stores may + still be unaligned, this is only an issue for page (4096 byte + boundary) crossing, which should be rare for these short moves. + The hardware handles this case automatically with a small delay. */ + + .align 4 +.L2: + mtcrf 0x01,5 + neg 8,4 + clrrwi 11,4,2 + andi. 0,8,3 + ble cr6,.LE8 /* Handle moves of 0-8 bytes. */ +/* At least 9 bytes left. Get the source word aligned. */ + cmplwi cr1,5,16 + mr 10,5 + mr 12,4 + cmplwi cr6,0,2 + beq .L3 /* If the source is already word aligned skip this. */ +/* Copy 1-3 bytes to get source address word aligned. */ + lwz 6,0(11) + subf 10,0,5 + add 12,4,0 + blt cr6,5f + srwi 7,6,16 + bgt cr6,3f +#ifdef __LITTLE_ENDIAN__ + sth 7,0(3) +#else + sth 6,0(3) +#endif + b 7f + .align 4 +3: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,24 + stb 6,0(3) + sth 7,1(3) +#else + stb 7,0(3) + sth 6,1(3) +#endif + b 7f + .align 4 +5: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,8 +#endif + stb 6,0(3) +7: + cmplwi cr1,10,16 + add 3,3,0 + mtcrf 0x01,10 + .align 4 +.L3: +/* At least 6 bytes left and the source is word aligned. */ + blt cr1,8f +16: /* Move 16 bytes. */ + lwz 6,0(12) + lwz 7,4(12) + stw 6,0(3) + lwz 6,8(12) + stw 7,4(3) + lwz 7,12(12) + addi 12,12,16 + stw 6,8(3) + stw 7,12(3) + addi 3,3,16 +8: /* Move 8 bytes. */ + bf 28,4f + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Move 4 bytes. */ + bf 29,2f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Move 2-3 bytes. */ + bf 30,1f + lhz 6,0(12) + sth 6,0(3) + bf 31,0f + lbz 7,2(12) + stb 7,2(3) + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr +1: /* Move 1 byte. */ + bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr + +/* Special case to copy 0-8 bytes. */ + .align 4 +.LE8: + mr 12,4 + bne cr6,4f + lwz 6,0(4) + lwz 7,4(4) + stw 6,0(3) + stw 7,4(3) + /* Return original dst pointer. */ + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr + .align 4 +4: bf 29,2b + lwz 6,0(4) + stw 6,0(3) +6: + bf 30,5f + lhz 7,4(4) + sth 7,4(3) + bf 31,0f + lbz 8,6(4) + stb 8,6(3) + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr + .align 4 +5: + bf 31,0f + lbz 6,4(4) + stb 6,4(3) + .align 4 +0: + /* Return original dst pointer. */ + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr + + .align 4 +.L6: + + /* Copy words where the destination is aligned but the source is + not. Use aligned word loads from the source, shifted to realign + the data, to allow aligned destination stores. + Use an unrolled loop to copy 4 words (16-bytes) per iteration. + A single word is retained for storing at loop exit to avoid walking + off the end of a page within the loop. + If the copy is not an exact multiple of 16 bytes, 1-3 + words are copied as needed to set up the main loop. After + the main loop exits there may be a tail of 1-3 bytes. These bytes are + copied a halfword/byte at a time as needed to preserve alignment. */ + + + cmplwi cr6,11,0 /* are there tail bytes left ? */ + subf 5,10,12 /* back up src pointer to prev word alignment */ + slwi 10,10,3 /* calculate number of bits to shift 1st word left */ + addi 11,9,-1 /* we move one word after the loop */ + srwi 8,11,2 /* calculate the 16 byte loop count */ + lwz 6,0(5) /* load 1st src word into R6 */ + mr 4,3 + lwz 7,4(5) /* load 2nd src word into R7 */ + mtcrf 0x01,11 + subfic 9,10,32 /* number of bits to shift 2nd word right */ + mtctr 8 + bf 30,1f + + /* there are at least two words to copy, so copy them */ +#ifdef __LITTLE_ENDIAN__ + srw 0,6,10 + slw 8,7,9 +#else + slw 0,6,10 /* shift 1st src word to left align it in R0 */ + srw 8,7,9 /* shift 2nd src word to right align it in R8 */ +#endif + or 0,0,8 /* or them to get word to store */ + lwz 6,8(5) /* load the 3rd src word */ + stw 0,0(4) /* store the 1st dst word */ +#ifdef __LITTLE_ENDIAN__ + srw 0,7,10 + slw 8,6,9 +#else + slw 0,7,10 /* now left align 2nd src word into R0 */ + srw 8,6,9 /* shift 3rd src word to right align it in R8 */ +#endif + or 0,0,8 /* or them to get word to store */ + lwz 7,12(5) + stw 0,4(4) /* store the 2nd dst word */ + addi 4,4,8 + addi 5,5,16 + bf 31,4f + /* there is a third word to copy, so copy it */ +#ifdef __LITTLE_ENDIAN__ + srw 0,6,10 + slw 8,7,9 +#else + slw 0,6,10 /* shift 3rd src word to left align it in R0 */ + srw 8,7,9 /* shift 4th src word to right align it in R8 */ +#endif + or 0,0,8 /* or them to get word to store */ + stw 0,0(4) /* store 3rd dst word */ + mr 6,7 + lwz 7,0(5) + addi 5,5,4 + addi 4,4,4 + b 4f + .align 4 +1: +#ifdef __LITTLE_ENDIAN__ + srw 0,6,10 + slw 8,7,9 +#else + slw 0,6,10 /* shift 1st src word to left align it in R0 */ + srw 8,7,9 /* shift 2nd src word to right align it in R8 */ +#endif + addi 5,5,8 + or 0,0,8 /* or them to get word to store */ + bf 31,4f + mr 6,7 + lwz 7,0(5) + addi 5,5,4 + stw 0,0(4) /* store the 1st dst word */ + addi 4,4,4 + + .align 4 +4: + /* copy 16 bytes at a time */ +#ifdef __LITTLE_ENDIAN__ + srw 0,6,10 + slw 8,7,9 +#else + slw 0,6,10 + srw 8,7,9 +#endif + or 0,0,8 + lwz 6,0(5) + stw 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srw 0,7,10 + slw 8,6,9 +#else + slw 0,7,10 + srw 8,6,9 +#endif + or 0,0,8 + lwz 7,4(5) + stw 0,4(4) +#ifdef __LITTLE_ENDIAN__ + srw 0,6,10 + slw 8,7,9 +#else + slw 0,6,10 + srw 8,7,9 +#endif + or 0,0,8 + lwz 6,8(5) + stw 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srw 0,7,10 + slw 8,6,9 +#else + slw 0,7,10 + srw 8,6,9 +#endif + or 0,0,8 + lwz 7,12(5) + stw 0,12(4) + addi 5,5,16 + addi 4,4,16 + bdnz+ 4b +8: + /* calculate and store the final word */ +#ifdef __LITTLE_ENDIAN__ + srw 0,6,10 + slw 8,7,9 +#else + slw 0,6,10 + srw 8,7,9 +#endif + or 0,0,8 + stw 0,0(4) +3: + clrrwi 0,31,2 + mtcrf 0x01,31 + bne cr6,.L9 /* If the tail is 0 bytes we are done! */ + + /* Return original dst pointer. */ + mr 3,30 + lwz 30,20(1) + lwz 31,24(1) + addi 1,1,32 + blr +END (memcpy) + +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memset.S new file mode 100644 index 0000000000..25319f7233 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/memset.S @@ -0,0 +1,226 @@ +/* Optimized memset implementation for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. + + The memset is done in three sizes: byte (8 bits), word (32 bits), + cache line (1024 bits). There is a special case for setting cache lines + to 0, to take advantage of the dcbz instruction. */ + + .machine power4 +EALIGN (memset, 5, 0) + CALL_MCOUNT + +#define rTMP r0 +#define rRTN r3 /* Initial value of 1st argument. */ +#define rMEMP0 r3 /* Original value of 1st arg. */ +#define rCHR r4 /* Char to set in each byte. */ +#define rLEN r5 /* Length of region to set. */ +#define rMEMP r6 /* Address at which we are storing. */ +#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */ +#define rMEMP2 r8 + +#define rNEG64 r8 /* Constant -64 for clearing with dcbz. */ +#define rCLS r8 /* Cache line size (known to be 128). */ +#define rCLM r9 /* Cache line size mask to check for cache alignment. */ +L(_memset): +/* Take care of case for size <= 4. */ + cmplwi cr1, rLEN, 4 + andi. rALIGN, rMEMP0, 3 + mr rMEMP, rMEMP0 + ble- cr1, L(small) + +/* Align to word boundary. */ + cmplwi cr5, rLEN, 31 + insrwi rCHR, rCHR, 8, 16 /* Replicate byte to halfword. */ + beq+ L(aligned) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 4 + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + bf+ 31, L(g0) + stb rCHR, 0(rMEMP0) + bt 30, L(aligned) +L(g0): + sth rCHR, -2(rMEMP) + +/* Handle the case of size < 31. */ +L(aligned): + mtcrf 0x01, rLEN + insrwi rCHR, rCHR, 16, 0 /* Replicate halfword to word. */ + ble cr5, L(medium) +/* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x1C + subfic rALIGN, rALIGN, 0x20 + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + cmplwi cr1, rALIGN, 0x10 + mr rMEMP2, rMEMP + bf 28, L(a1) + stw rCHR, -4(rMEMP2) + stwu rCHR, -8(rMEMP2) +L(a1): blt cr1, L(a2) + stw rCHR, -4(rMEMP2) + stw rCHR, -8(rMEMP2) + stw rCHR, -12(rMEMP2) + stwu rCHR, -16(rMEMP2) +L(a2): bf 29, L(caligned) + stw rCHR, -4(rMEMP2) + +/* Now aligned to a 32 byte boundary. */ +L(caligned): + cmplwi cr1, rCHR, 0 + clrrwi. rALIGN, rLEN, 5 + mtcrf 0x01, rLEN + beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */ +L(nondcbz): + srwi rTMP, rALIGN, 5 + mtctr rTMP + beq L(medium) /* We may not actually get to do a full line. */ + clrlwi. rLEN, rLEN, 27 + add rMEMP, rMEMP, rALIGN + li rNEG64, -0x40 + bdz L(cloopdone) + + .align 4 +L(c3): dcbtst rNEG64, rMEMP + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + stw rCHR, -12(rMEMP) + stw rCHR, -16(rMEMP) + stw rCHR, -20(rMEMP) + stw rCHR, -24(rMEMP) + stw rCHR, -28(rMEMP) + stwu rCHR, -32(rMEMP) + bdnz L(c3) +L(cloopdone): + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + stw rCHR, -12(rMEMP) + stw rCHR, -16(rMEMP) + cmplwi cr1, rLEN, 16 + stw rCHR, -20(rMEMP) + stw rCHR, -24(rMEMP) + stw rCHR, -28(rMEMP) + stwu rCHR, -32(rMEMP) + beqlr + add rMEMP, rMEMP, rALIGN + b L(medium_tail2) + + .align 5 +/* Clear lines of memory in 128-byte chunks. */ +L(zloopstart): +/* If the remaining length is less the 32 bytes, don't bother getting + the cache line size. */ + beq L(medium) + li rCLS,128 /* cache line size is 128 */ + dcbt 0,rMEMP +L(getCacheAligned): + cmplwi cr1,rLEN,32 + andi. rTMP,rMEMP,127 + blt cr1,L(handletail32) + beq L(cacheAligned) + addi rMEMP,rMEMP,32 + addi rLEN,rLEN,-32 + stw rCHR,-32(rMEMP) + stw rCHR,-28(rMEMP) + stw rCHR,-24(rMEMP) + stw rCHR,-20(rMEMP) + stw rCHR,-16(rMEMP) + stw rCHR,-12(rMEMP) + stw rCHR,-8(rMEMP) + stw rCHR,-4(rMEMP) + b L(getCacheAligned) + +/* Now we are aligned to the cache line and can use dcbz. */ + .align 4 +L(cacheAligned): + cmplw cr1,rLEN,rCLS + blt cr1,L(handletail32) + dcbz 0,rMEMP + subf rLEN,rCLS,rLEN + add rMEMP,rMEMP,rCLS + b L(cacheAligned) + +/* We are here because the cache line size was set and the remainder + (rLEN) is less than the actual cache line size. + So set up the preconditions for L(nondcbz) and go there. */ +L(handletail32): + clrrwi. rALIGN, rLEN, 5 + b L(nondcbz) + + .align 5 +L(small): +/* Memset of 4 bytes or less. */ + cmplwi cr5, rLEN, 1 + cmplwi cr1, rLEN, 3 + bltlr cr5 + stb rCHR, 0(rMEMP) + beqlr cr5 + stb rCHR, 1(rMEMP) + bltlr cr1 + stb rCHR, 2(rMEMP) + beqlr cr1 + stb rCHR, 3(rMEMP) + blr + +/* Memset of 0-31 bytes. */ + .align 5 +L(medium): + cmplwi cr1, rLEN, 16 +L(medium_tail2): + add rMEMP, rMEMP, rLEN +L(medium_tail): + bt- 31, L(medium_31t) + bt- 30, L(medium_30t) +L(medium_30f): + bt- 29, L(medium_29t) +L(medium_29f): + bge- cr1, L(medium_27t) + bflr- 28 + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + blr + +L(medium_31t): + stbu rCHR, -1(rMEMP) + bf- 30, L(medium_30f) +L(medium_30t): + sthu rCHR, -2(rMEMP) + bf- 29, L(medium_29f) +L(medium_29t): + stwu rCHR, -4(rMEMP) + blt- cr1, L(medium_27f) +L(medium_27t): + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + stw rCHR, -12(rMEMP) + stwu rCHR, -16(rMEMP) +L(medium_27f): + bflr- 28 +L(medium_28t): + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + blr +END (memset) +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile new file mode 100644 index 0000000000..bd9d360efa --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/Makefile @@ -0,0 +1,30 @@ +ifeq ($(subdir),string) +sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ + memcpy-ppc32 memcmp-power7 memcmp-ppc32 memset-power7 \ + memset-power6 memset-ppc32 bzero-power7 bzero-power6 \ + bzero-ppc32 mempcpy-power7 mempcpy-ppc32 memchr-power7 \ + memchr-ppc32 memrchr-power7 memrchr-ppc32 rawmemchr-power7 \ + rawmemchr-ppc32 strlen-power7 strlen-ppc32 strnlen-power7 \ + strnlen-ppc32 strncmp-power7 strncmp-ppc32 \ + strcasecmp-power7 strcasecmp_l-power7 strncase-power7 \ + strncase_l-power7 strchrnul-power7 strchrnul-ppc32 \ + strchr-power7 strchr-ppc32 \ + wordcopy-power7 wordcopy-ppc32 \ + memmove-power7 memmove-ppc + +CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops +CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops +endif + +ifeq ($(subdir),wcsmbs) +sysdep_routines += wcschr-power7 wcschr-power6 wcschr-ppc32 \ + wcsrchr-power7 wcsrchr-power6 wcsrchr-ppc32 \ + wcscpy-power7 wcscpy-power6 wcscpy-ppc32 + +CFLAGS-wcschr-power7.c += -mcpu=power7 +CFLAGS-wcschr-power6.c += -mcpu=power6 +CFLAGS-wcsrchr-power7.c += -mcpu=power7 +CFLAGS-wcsrchr-power6.c += -mcpu=power6 +CFLAGS-wcscpy-power7.c += -mcpu=power7 +CFLAGS-wcscpy-power6.c += -mcpu=power6 +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-power6.S new file mode 100644 index 0000000000..4e000309cf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-power6.S @@ -0,0 +1,26 @@ +/* Optimized bzero implementation for PowerPC32/POWER6. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__bzero_power6) + mr r5,r4 + li r4,0 + b __memset_power6@local +END (__bzero_power6) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-power7.S new file mode 100644 index 0000000000..580da55166 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-power7.S @@ -0,0 +1,26 @@ +/* Optimized bzero implementation for PowerPC32/POWER7. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__bzero_power7) + mr r5,r4 + li r4,0 + b __memset_power7@local +END (__bzero_power7) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-ppc32.S new file mode 100644 index 0000000000..33c69cbfb9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero-ppc32.S @@ -0,0 +1,35 @@ +/* Optimized bzero implementation for PowerPC32/PPC32. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* memset ifunc selector is not built for static and memset@local + for shared builds makes the linker point the call to the ifunc + selector. */ +#ifdef SHARED +# define MEMSET __memset_ppc +#else +# define MEMSET memset +#endif + +ENTRY (__bzero_ppc) + mr r5,r4 + li r4,0 + b MEMSET@local +END (__bzero_ppc) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero.c new file mode 100644 index 0000000000..865920ee26 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/bzero.c @@ -0,0 +1,37 @@ +/* Multiple versions of bzero. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# include <string.h> +# include <strings.h> +# include "init-arch.h" + +extern __typeof (bzero) __bzero_ppc attribute_hidden; +extern __typeof (bzero) __bzero_power6 attribute_hidden; +extern __typeof (bzero) __bzero_power7 attribute_hidden; + +libc_ifunc (__bzero, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __bzero_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __bzero_power6 + : __bzero_ppc); + +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c new file mode 100644 index 0000000000..1caf15a07d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/ifunc-impl-list.c @@ -0,0 +1,224 @@ +/* Enumerate available IFUNC implementations of a function. PowerPC32 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <string.h> +#include <wchar.h> +#include <ldsodefs.h> +#include <ifunc-impl-list.h> + +/* Maximum number of IFUNC implementations. */ +#define MAX_IFUNC 6 + +size_t +__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + size_t max) +{ + assert (max >= MAX_IFUNC); + + size_t i = 0; + + unsigned long int hwcap = GLRO(dl_hwcap); + /* hwcap contains only the latest supported ISA, the code checks which is + and fills the previous supported ones. */ + if (hwcap & PPC_FEATURE_ARCH_2_06) + hwcap |= PPC_FEATURE_ARCH_2_05 | PPC_FEATURE_POWER5_PLUS | + PPC_FEATURE_POWER5 | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_ARCH_2_05) + hwcap |= PPC_FEATURE_POWER5_PLUS | PPC_FEATURE_POWER5 | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_POWER5_PLUS) + hwcap |= PPC_FEATURE_POWER5 | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_POWER5) + hwcap |= PPC_FEATURE_POWER4; + +#ifdef SHARED + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c. */ + IFUNC_IMPL (i, name, memcpy, + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_HAS_VSX, + __memcpy_power7) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_ARCH_2_06, + __memcpy_a2) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_ARCH_2_05, + __memcpy_power6) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_CELL_BE, + __memcpy_cell) + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/memmove.c. */ + IFUNC_IMPL (i, name, memmove, + IFUNC_IMPL_ADD (array, i, memmove, hwcap & PPC_FEATURE_HAS_VSX, + __memmove_power7) + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/memset.c. */ + IFUNC_IMPL (i, name, memset, + IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_HAS_VSX, + __memset_power7) + IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_ARCH_2_05, + __memset_power6) + IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/bzero.c. */ + IFUNC_IMPL (i, name, bzero, + IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_HAS_VSX, + __bzero_power7) + IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_ARCH_2_05, + __bzero_power6) + IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strlen.c. */ + IFUNC_IMPL (i, name, strlen, + IFUNC_IMPL_ADD (array, i, strlen, hwcap & PPC_FEATURE_HAS_VSX, + __strlen_power7) + IFUNC_IMPL_ADD (array, i, strlen, 1, + __strlen_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strnlen.c. */ + IFUNC_IMPL (i, name, strnlen, + IFUNC_IMPL_ADD (array, i, strnlen, hwcap & PPC_FEATURE_HAS_VSX, + __strnlen_power7) + IFUNC_IMPL_ADD (array, i, strnlen, 1, + __strnlen_ppc)) + + /* Support sysdeps/powerpc/powerpc32/multiarch/strncmp.c. */ + IFUNC_IMPL (i, name, strncmp, + IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_HAS_VSX, + __strncmp_power7) + IFUNC_IMPL_ADD (array, i, strncmp, 1, + __strncmp_ppc)) +#endif + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/memcmp.c. */ + IFUNC_IMPL (i, name, memcmp, + IFUNC_IMPL_ADD (array, i, memcmp, hwcap & PPC_FEATURE_HAS_VSX, + __memcmp_power7) + IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy.c. */ + IFUNC_IMPL (i, name, mempcpy, + IFUNC_IMPL_ADD (array, i, mempcpy, + hwcap & PPC_FEATURE_HAS_VSX, + __mempcpy_power7) + IFUNC_IMPL_ADD (array, i, mempcpy, 1, + __mempcpy_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/memchr.c. */ + IFUNC_IMPL (i, name, memchr, + IFUNC_IMPL_ADD (array, i, memchr, + hwcap & PPC_FEATURE_HAS_VSX, + __memchr_power7) + IFUNC_IMPL_ADD (array, i, memchr, 1, + __memchr_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/memrchr.c. */ + IFUNC_IMPL (i, name, memrchr, + IFUNC_IMPL_ADD (array, i, memrchr, + hwcap & PPC_FEATURE_HAS_VSX, + __memrchr_power7) + IFUNC_IMPL_ADD (array, i, memrchr, 1, + __memrchr_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr.c. */ + IFUNC_IMPL (i, name, rawmemchr, + IFUNC_IMPL_ADD (array, i, rawmemchr, + hwcap & PPC_FEATURE_HAS_VSX, + __rawmemchr_power7) + IFUNC_IMPL_ADD (array, i, rawmemchr, 1, + __rawmemchr_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp.c. */ + IFUNC_IMPL (i, name, strcasecmp, + IFUNC_IMPL_ADD (array, i, strcasecmp, + hwcap & PPC_FEATURE_HAS_VSX, + __strcasecmp_power7) + IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l.c. */ + IFUNC_IMPL (i, name, strcasecmp_l, + IFUNC_IMPL_ADD (array, i, strcasecmp_l, + hwcap & PPC_FEATURE_HAS_VSX, + __strcasecmp_l_power7) + IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1, + __strcasecmp_l_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strncase.c. */ + IFUNC_IMPL (i, name, strncasecmp, + IFUNC_IMPL_ADD (array, i, strncasecmp, + hwcap & PPC_FEATURE_HAS_VSX, + __strncasecmp_power7) + IFUNC_IMPL_ADD (array, i, strncasecmp, 1, __strncasecmp_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l.c. */ + IFUNC_IMPL (i, name, strncasecmp_l, + IFUNC_IMPL_ADD (array, i, strncasecmp_l, + hwcap & PPC_FEATURE_HAS_VSX, + __strncasecmp_l_power7) + IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1, + __strncasecmp_l_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul.c. */ + IFUNC_IMPL (i, name, strchrnul, + IFUNC_IMPL_ADD (array, i, strchrnul, + hwcap & PPC_FEATURE_HAS_VSX, + __strchrnul_power7) + IFUNC_IMPL_ADD (array, i, strchrnul, 1, + __strchrnul_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/strchr.c. */ + IFUNC_IMPL (i, name, strchr, + IFUNC_IMPL_ADD (array, i, strchr, + hwcap & PPC_FEATURE_HAS_VSX, + __strchr_power7) + IFUNC_IMPL_ADD (array, i, strchr, 1, + __strchr_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/wcschr.c. */ + IFUNC_IMPL (i, name, wcschr, + IFUNC_IMPL_ADD (array, i, wcschr, + hwcap & PPC_FEATURE_HAS_VSX, + __wcschr_power7) + IFUNC_IMPL_ADD (array, i, wcschr, + hwcap & PPC_FEATURE_ARCH_2_05, + __wcschr_power6) + IFUNC_IMPL_ADD (array, i, wcschr, 1, + __wcschr_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr.c. */ + IFUNC_IMPL (i, name, wcsrchr, + IFUNC_IMPL_ADD (array, i, wcsrchr, + hwcap & PPC_FEATURE_HAS_VSX, + __wcsrchr_power7) + IFUNC_IMPL_ADD (array, i, wcsrchr, + hwcap & PPC_FEATURE_ARCH_2_05, + __wcsrchr_power6) + IFUNC_IMPL_ADD (array, i, wcsrchr, 1, + __wcsrchr_ppc)) + + /* Support sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c. */ + IFUNC_IMPL (i, name, wcscpy, + IFUNC_IMPL_ADD (array, i, wcscpy, + hwcap & PPC_FEATURE_HAS_VSX, + __wcscpy_power7) + IFUNC_IMPL_ADD (array, i, wcscpy, + hwcap & PPC_FEATURE_ARCH_2_05, + __wcscpy_power6) + IFUNC_IMPL_ADD (array, i, wcscpy, 1, + __wcscpy_ppc)) + + return i; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h new file mode 100644 index 0000000000..f2e6a4b705 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h @@ -0,0 +1,53 @@ +/* This file is part of the GNU C Library. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <ldsodefs.h> + +/* The code checks if _rtld_global_ro was realocated before trying to access + the dl_hwcap field. The assembly is to make the compiler not optimize the + test (&_rtld_global_ro != NULL), which is always true in ISO C (but not + in that case since _rtld_global_ro might not been realocated yet). */ +#if defined(SHARED) && !IS_IN (rtld) +# define __GLRO(value) \ + ({ volatile void **__p = (volatile void**)(&_rtld_global_ro); \ + unsigned long int __ret; \ + asm ("# x in %0" : "+r" (__p)); \ + __ret = (__p) ? GLRO(value) : 0; \ + __ret; }) +#else +# define __GLRO(value) GLRO(value) +#endif + +/* dl_hwcap contains only the latest supported ISA, the macro checks which is + and fills the previous ones. */ +#define INIT_ARCH() \ + unsigned long int hwcap = __GLRO(dl_hwcap); \ + unsigned long int __attribute__((unused)) hwcap2 = __GLRO(dl_hwcap2); \ + if (hwcap & PPC_FEATURE_ARCH_2_06) \ + hwcap |= PPC_FEATURE_ARCH_2_05 | \ + PPC_FEATURE_POWER5_PLUS | \ + PPC_FEATURE_POWER5 | \ + PPC_FEATURE_POWER4; \ + else if (hwcap & PPC_FEATURE_ARCH_2_05) \ + hwcap |= PPC_FEATURE_POWER5_PLUS | \ + PPC_FEATURE_POWER5 | \ + PPC_FEATURE_POWER4; \ + else if (hwcap & PPC_FEATURE_POWER5_PLUS) \ + hwcap |= PPC_FEATURE_POWER5 | \ + PPC_FEATURE_POWER4; \ + else if (hwcap & PPC_FEATURE_POWER5) \ + hwcap |= PPC_FEATURE_POWER4; diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-power7.S new file mode 100644 index 0000000000..e7eb56a8fc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-power7.S @@ -0,0 +1,40 @@ +/* Optimized memchr implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(__memchr_power7); \ + .type C_SYMBOL_NAME(__memchr_power7),@function; \ + C_LABEL(__memchr_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memchr_power7) + +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/memchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-ppc32.c new file mode 100644 index 0000000000..1e4b88f9e9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memchr-ppc32.c @@ -0,0 +1,34 @@ +/* PowerPC32 default implementation of memchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define MEMCHR __memchr_ppc + +#undef weak_alias +#define weak_alias(a, b) + +#ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__memchr_ppc, __GI_memchr, __memchr_ppc); +#endif + +extern __typeof (memchr) __memchr_ppc attribute_hidden; + +#include <string/memchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memchr.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memchr.c new file mode 100644 index 0000000000..7eb4be7248 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memchr.c @@ -0,0 +1,41 @@ +/* Multiple versions of memchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# undef memchr +/* Redefine memchr so that the compiler won't make the weak_alias point + to internal hidden definition (__GI_memchr), since PPC32 does not + support local IFUNC calls. */ +# define memchr __redirect_memchr +# include <string.h> +# include "init-arch.h" + +extern __typeof (__redirect_memchr) __memchr_ppc attribute_hidden; +extern __typeof (__redirect_memchr) __memchr_power7 attribute_hidden; + +extern __typeof (__redirect_memchr) __libc_memchr; + +libc_ifunc (__libc_memchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memchr_power7 + : __memchr_ppc); +#undef memchr +weak_alias (__libc_memchr, memchr) +#else +#include <string/memchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcmp-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcmp-power7.S new file mode 100644 index 0000000000..e002aef057 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcmp-power7.S @@ -0,0 +1,41 @@ +/* Optimized memcmp implementation for POWER7/PowerPC32. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memcmp_power7); \ + .type C_SYMBOL_NAME(__memcmp_power7),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memcmp_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memcmp_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef weak_alias +#define weak_alias(a, b) + +#include <sysdeps/powerpc/powerpc32/power7/memcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcmp-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcmp-ppc32.S new file mode 100644 index 0000000000..dc1f21bcb5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcmp-ppc32.S @@ -0,0 +1,45 @@ +/* Default memcmp implementation for PowerPC32. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if IS_IN (libc) +# undef EALIGN +# define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memcmp_ppc); \ + .type C_SYMBOL_NAME(__memcmp_ppc),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memcmp_ppc) \ + cfi_startproc; + +# undef END +# define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memcmp_ppc) + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_memcmp; __GI_memcmp = __memcmp_ppc + +# undef weak_alias +# define weak_alias(a, b) \ + .weak b ; b = __memcmp_ppc +#endif + +#include <sysdeps/powerpc/powerpc32/power4/memcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcmp.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcmp.c new file mode 100644 index 0000000000..00bbcfaa4c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcmp.c @@ -0,0 +1,36 @@ +/* Multiple versions of memcmp. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# define memcmp __redirect_memcmp +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (memcmp) __memcmp_ppc attribute_hidden; +extern __typeof (memcmp) __memcmp_power7 attribute_hidden; +# undef memcmp + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_memcmp, memcmp, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memcmp_power7 + : __memcmp_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-a2.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-a2.S new file mode 100644 index 0000000000..17a31226c5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-a2.S @@ -0,0 +1,38 @@ +/* Optimized memcpy implementation for PowerPC A2. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memcpy_a2); \ + .type C_SYMBOL_NAME(__memcpy_a2),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memcpy_a2) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memcpy_a2) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/a2/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-cell.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-cell.S new file mode 100644 index 0000000000..59859c6b94 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-cell.S @@ -0,0 +1,38 @@ +/* Optimized memcpy implementation for CELL BE PowerPC. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memcpy_cell); \ + .type C_SYMBOL_NAME(__memcpy_cell),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memcpy_cell) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memcpy_cell) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/cell/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-power6.S new file mode 100644 index 0000000000..750151973b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-power6.S @@ -0,0 +1,38 @@ +/* Optimized memcpy implementation for PowerPC32 on POWER6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memcpy_power6); \ + .type C_SYMBOL_NAME(__memcpy_power6),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memcpy_power6) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memcpy_power6) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power6/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-power7.S new file mode 100644 index 0000000000..3ac7c32084 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-power7.S @@ -0,0 +1,38 @@ +/* Optimized memcpy implementation for PowerPC32/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memcpy_power7); \ + .type C_SYMBOL_NAME(__memcpy_power7),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memcpy_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memcpy_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-ppc32.S new file mode 100644 index 0000000000..f018684155 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-ppc32.S @@ -0,0 +1,41 @@ +/* Default memcpy implementation for PowerPC32. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && IS_IN (libc) +# undef EALIGN +# define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memcpy_ppc); \ + .type C_SYMBOL_NAME(__memcpy_ppc),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memcpy_ppc) \ + cfi_startproc; + +# undef END +# define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memcpy_ppc) + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_memcpy; __GI_memcpy = __memcpy_ppc +#endif + +#include <sysdeps/powerpc/powerpc32/power4/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c new file mode 100644 index 0000000000..b414ba946b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c @@ -0,0 +1,48 @@ +/* Multiple versions of memcpy. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for the definition in lib and for + DSO. In static binaries we need memcpy before the initialization + happened. */ +#if defined SHARED && IS_IN (libc) +# undef memcpy +# define memcpy __redirect_memcpy +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (memcpy) __memcpy_ppc attribute_hidden; +extern __typeof (memcpy) __memcpy_cell attribute_hidden; +extern __typeof (memcpy) __memcpy_power6 attribute_hidden; +extern __typeof (memcpy) __memcpy_a2 attribute_hidden; +extern __typeof (memcpy) __memcpy_power7 attribute_hidden; +# undef memcpy + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_memcpy, memcpy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memcpy_power7 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __memcpy_a2 + : (hwcap & PPC_FEATURE_ARCH_2_05) + ? __memcpy_power6 + : (hwcap & PPC_FEATURE_CELL_BE) + ? __memcpy_cell + : __memcpy_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memmove-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memmove-power7.c new file mode 100644 index 0000000000..12902fec3f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memmove-power7.c @@ -0,0 +1,41 @@ +/* Power7 multiarch memmove. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <memcopy.h> + +extern __typeof (_wordcopy_fwd_aligned) _wordcopy_fwd_aligned_power7; +extern __typeof (_wordcopy_fwd_dest_aligned) _wordcopy_fwd_dest_aligned_power7; +extern __typeof (_wordcopy_bwd_aligned) _wordcopy_bwd_aligned_power7; +extern __typeof (_wordcopy_bwd_dest_aligned) _wordcopy_bwd_dest_aligned_power7; + +#define _wordcopy_fwd_aligned _wordcopy_fwd_aligned_power7 +#define _wordcopy_fwd_dest_aligned _wordcopy_fwd_dest_aligned_power7 +#define _wordcopy_bwd_aligned _wordcopy_bwd_aligned_power7 +#define _wordcopy_bwd_dest_aligned _wordcopy_bwd_dest_aligned_power7 + +extern __typeof (memcpy) __memcpy_power7; +#define memcpy __memcpy_power7 + +extern __typeof (memmove) __memmove_power7; +#define MEMMOVE __memmove_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <string/memmove.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memmove-ppc.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memmove-ppc.c new file mode 100644 index 0000000000..59f298507c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memmove-ppc.c @@ -0,0 +1,44 @@ +/* Power7 multiarch memmove. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <memcopy.h> + +extern __typeof (_wordcopy_fwd_aligned) _wordcopy_fwd_aligned_ppc; +extern __typeof (_wordcopy_fwd_dest_aligned) _wordcopy_fwd_dest_aligned_ppc; +extern __typeof (_wordcopy_bwd_aligned) _wordcopy_bwd_aligned_ppc; +extern __typeof (_wordcopy_bwd_dest_aligned) _wordcopy_bwd_dest_aligned_ppc; + +#define _wordcopy_fwd_aligned _wordcopy_fwd_aligned_ppc +#define _wordcopy_fwd_dest_aligned _wordcopy_fwd_dest_aligned_ppc +#define _wordcopy_bwd_aligned _wordcopy_bwd_aligned_ppc +#define _wordcopy_bwd_dest_aligned _wordcopy_bwd_dest_aligned_ppc + +extern __typeof (memcpy) __memcpy_ppc; +#define memcpy __memcpy_ppc + +extern __typeof (memmove) __memmove_ppc; +#define MEMMOVE __memmove_ppc + +#if defined SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__memmove_ppc, __GI_memmove, __memmove_ppc); +#endif + +#include <string/memmove.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memmove.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memmove.c new file mode 100644 index 0000000000..481139fae8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memmove.c @@ -0,0 +1,36 @@ +/* Multiple versions of memmove. PowerPC32 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) +/* Redefine memmove so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# define memmove __redirect_memmove +# include <string.h> +# include "init-arch.h" + +extern __typeof (memmove) __memmove_ppc attribute_hidden; +extern __typeof (memmove) __memmove_power7 attribute_hidden; +# undef memmove + +libc_ifunc_redirected (__redirect_memmove, memmove, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memmove_power7 + : __memmove_ppc); +#else +# include <string/memmove.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy-power7.S new file mode 100644 index 0000000000..a1a078dec6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy-power7.S @@ -0,0 +1,35 @@ +/* Optimized mempcpy implementation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__mempcpy_power7); \ + .type C_SYMBOL_NAME(__mempcpy_power7),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__mempcpy_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__mempcpy_power7) + +#include <sysdeps/powerpc/powerpc32/power7/mempcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy-ppc32.c new file mode 100644 index 0000000000..2a20060e5b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy-ppc32.c @@ -0,0 +1,32 @@ +/* PowerPC32 default implementation of mempcpy. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define MEMPCPY __mempcpy_ppc + +#undef libc_hidden_def +#define libc_hidden_def(name) +#undef weak_alias +#define weak_alias(a, b) + +#if defined SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__mempcpy_ppc, __GI_mempcpy, __mempcpy_ppc); +#endif + +#include <string/mempcpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy.c new file mode 100644 index 0000000000..0c7250a4bf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy.c @@ -0,0 +1,44 @@ +/* Multiple versions of mempcpy. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define mempcpy __redirect_mempcpy +# define __mempcpy __redirect___mempcpy +# define NO_MEMPCPY_STPCPY_REDIRECT +/* Omit the mempcpy inline definitions because it would redefine mempcpy. */ +# define _HAVE_STRING_ARCH_mempcpy 1 +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__mempcpy) __mempcpy_ppc attribute_hidden; +extern __typeof (__mempcpy) __mempcpy_power7 attribute_hidden; +# undef mempcpy +# undef __mempcpy + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect___mempcpy, __mempcpy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __mempcpy_power7 + : __mempcpy_ppc); + +weak_alias (__mempcpy, mempcpy) +#else +# include <string/mempcpy.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-power7.S new file mode 100644 index 0000000000..4c3f6af9f3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-power7.S @@ -0,0 +1,40 @@ +/* Optimized memrchr implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(__memrchr_power7); \ + .type C_SYMBOL_NAME(__memrchr_power7),@function; \ + C_LABEL(__memrchr_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memrchr_power7) + +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/memrchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c new file mode 100644 index 0000000000..a0247f49c8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c @@ -0,0 +1,25 @@ +/* PowerPC32 default implementation of memrchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define MEMRCHR __memrchr_ppc +# include <string.h> +extern void *__memrchr_ppc (const void *, int, size_t); +#endif + +#include <string/memrchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr.c new file mode 100644 index 0000000000..fb09fdf89c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memrchr.c @@ -0,0 +1,37 @@ +/* Multiple versions of memrchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__memrchr) __memrchr_ppc attribute_hidden; +extern __typeof (__memrchr) __memrchr_power7 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__memrchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memrchr_power7 + : __memrchr_ppc); + +weak_alias (__memrchr, memrchr) +#else +#include <string/memrchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset-power6.S new file mode 100644 index 0000000000..55ff437a20 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset-power6.S @@ -0,0 +1,38 @@ +/* Optimized 32-bit memset implementation for POWER6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memset_power6); \ + .type C_SYMBOL_NAME(__memset_power6),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memset_power6) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memset_power6) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power6/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset-power7.S new file mode 100644 index 0000000000..ced4cb015b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset-power7.S @@ -0,0 +1,38 @@ +/* Optimized memset implementation for PowerPC32/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memset_power7); \ + .type C_SYMBOL_NAME(__memset_power7),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memset_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memset_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S new file mode 100644 index 0000000000..63cd5b4eea --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S @@ -0,0 +1,41 @@ +/* Default memset implementation for PowerPC32. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && IS_IN (libc) +# undef EALIGN +# define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__memset_ppc); \ + .type C_SYMBOL_NAME(__memset_ppc),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__memset_ppc) \ + cfi_startproc; + +# undef END +# define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__memset_ppc) + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_memset; __GI_memset = __memset_ppc +#endif + +#include <sysdeps/powerpc/powerpc32/power4/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset.c new file mode 100644 index 0000000000..afcca12c78 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/memset.c @@ -0,0 +1,39 @@ +/* Multiple versions of memset. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if defined SHARED && IS_IN (libc) +# define memset __redirect_memset +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (memset) __memset_ppc attribute_hidden; +extern __typeof (memset) __memset_power6 attribute_hidden; +extern __typeof (memset) __memset_power7 attribute_hidden; +# undef memset + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_memset, memset, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memset_power7 + : (hwcap & PPC_FEATURE_ARCH_2_05) + ? __memset_power6 + : __memset_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr-power7.S new file mode 100644 index 0000000000..e088c6b046 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr-power7.S @@ -0,0 +1,40 @@ +/* Optimized rawrawmemchr implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(__rawmemchr_power7); \ + .type C_SYMBOL_NAME(__rawmemchr_power7),@function; \ + C_LABEL(__rawmemchr_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__rawmemchr_power7) + +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/rawmemchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr-ppc32.c new file mode 100644 index 0000000000..bce76cbe75 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr-ppc32.c @@ -0,0 +1,32 @@ +/* PowerPC32 default implementation of rawmemchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define RAWMEMCHR __rawmemchr_ppc +#undef weak_alias +#define weak_alias(a, b) +#ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__rawmemchr_ppc, __GI___rawmemchr, __rawmemchr_ppc); +#endif + +extern __typeof (rawmemchr) __rawmemchr_ppc attribute_hidden; + +#include <string/rawmemchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr.c new file mode 100644 index 0000000000..6ea56db0af --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr.c @@ -0,0 +1,38 @@ +/* Multiple versions of rawmemchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define __rawmemchr __redirect___rawmemchr +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__rawmemchr) __rawmemchr_ppc attribute_hidden; +extern __typeof (__rawmemchr) __rawmemchr_power7 attribute_hidden; +# undef __rawmemchr + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect___rawmemchr, __rawmemchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __rawmemchr_power7 + : __rawmemchr_ppc); +weak_alias (__rawmemchr, rawmemchr) +#else +#include <string/rawmemchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-memcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-memcmp.S new file mode 100644 index 0000000000..b676dd147e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-memcmp.S @@ -0,0 +1,19 @@ +/* Loader memcmp implementation for PowerPC32. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/memcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-memset.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-memset.S new file mode 100644 index 0000000000..b9eb81328f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-memset.S @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-strchr.S new file mode 100644 index 0000000000..5d197557af --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-strchr.S @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/strchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-strnlen.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-strnlen.c new file mode 100644 index 0000000000..79704aa2d9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/rtld-strnlen.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string/strnlen.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp-power7.S new file mode 100644 index 0000000000..f9324a972e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp-power7.S @@ -0,0 +1,39 @@ +/* Optimized strcasecmp implementation for PowerPC32. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(__strcasecmp_power7); \ + .type C_SYMBOL_NAME(__strcasecmp_power7),@function; \ + C_LABEL(__strcasecmp_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strcasecmp_power7) + +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/strcasecmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp.c new file mode 100644 index 0000000000..da7d8415d2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp.c @@ -0,0 +1,41 @@ +/* Multiple versions of strcasecmp. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# define strcasecmp __strcasecmp_ppc + +extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden; +extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden; +#endif + +#include <string/strcasecmp.c> +#undef strcasecmp + +#if IS_IN (libc) +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strcasecmp) __libc_strcasecmp; +libc_ifunc (__libc_strcasecmp, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcasecmp_power7 + : __strcasecmp_ppc); + +weak_alias (__libc_strcasecmp, strcasecmp) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l-power7.S new file mode 100644 index 0000000000..66e0584139 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l-power7.S @@ -0,0 +1,41 @@ +/* Default strcasecmp implementation for PowerPC32. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(__strcasecmp_l_power7); \ + .type C_SYMBOL_NAME(__strcasecmp_l_power7),@function; \ + C_LABEL(__strcasecmp_l_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strcasecmp_l_power7) + +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#define USE_IN_EXTENDED_LOCALE_MODEL + +#include <sysdeps/powerpc/powerpc32/power7/strcasecmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l.c new file mode 100644 index 0000000000..85411f5558 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l.c @@ -0,0 +1,41 @@ +/* Multiple versions of strcasecmp. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# define strcasecmp_l __strcasecmp_l_ppc + +extern __typeof (__strcasecmp_l) __strcasecmp_l_ppc attribute_hidden; +extern __typeof (__strcasecmp_l) __strcasecmp_l_power7 attribute_hidden; +#endif + +#include <string/strcasecmp_l.c> +#undef strcasecmp_l + +#if IS_IN (libc) +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strcasecmp_l) __libc_strcasecmp_l; +libc_ifunc (__libc_strcasecmp_l, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcasecmp_l_power7 + : __strcasecmp_l_ppc); + +weak_alias (__libc_strcasecmp_l, strcasecmp_l) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchr-power7.S new file mode 100644 index 0000000000..7624a27bbd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchr-power7.S @@ -0,0 +1,39 @@ +/* Optimized strchr implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(__strchr_power7); \ + .type C_SYMBOL_NAME(__strchr_power7),@function; \ + C_LABEL(__strchr_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strchr_power7) + +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/strchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchr-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchr-ppc32.S new file mode 100644 index 0000000000..7dcd55af59 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchr-ppc32.S @@ -0,0 +1,41 @@ +/* PowerPC32 default implementation of strchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifdef SHARED +# undef ENTRY +# define ENTRY(name) \ + .globl C_SYMBOL_NAME(__strchr_ppc); \ + .type C_SYMBOL_NAME(__strchr_ppc),@function; \ + .align ALIGNARG(2); \ + C_LABEL(__strchr_ppc) \ + cfi_startproc; \ + CALL_MCOUNT + +# undef END +# define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strchr_ppc) \ + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_strchr; __GI_strchr = __strchr_ppc +#endif + +#include <sysdeps/powerpc/powerpc32/strchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchr.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchr.c new file mode 100644 index 0000000000..712bc1a4b8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchr.c @@ -0,0 +1,39 @@ +/* Multiple versions of strchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if defined SHARED && IS_IN (libc) +# define strchr __redirect_strchr +/* Omit the strchr inline definitions because it would redefine strchr. */ +# define __NO_STRING_INLINES +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strchr) __strchr_ppc attribute_hidden; +extern __typeof (strchr) __strchr_power7 attribute_hidden; +# undef strchr + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strchr, strchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strchr_power7 + : __strchr_ppc); +weak_alias (strchr, index) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul-power7.S new file mode 100644 index 0000000000..3baad50818 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul-power7.S @@ -0,0 +1,39 @@ +/* Optimized strchrnul implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(__strchrnul_power7); \ + .type C_SYMBOL_NAME(__strchrnul_power7),@function; \ + C_LABEL(__strchrnul_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strchrnul_power7) + +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/strchrnul.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul-ppc32.c new file mode 100644 index 0000000000..c981eb67f3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul-ppc32.c @@ -0,0 +1,28 @@ +/* PowerPC32 default implementation of strchrnul. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRCHRNUL __strchrnul_ppc + +#undef weak_alias +#define weak_alias(a,b ) + +extern __typeof (strchrnul) __strchrnul_ppc attribute_hidden; + +#include <string/strchrnul.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul.c new file mode 100644 index 0000000000..b8f853d8e4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul.c @@ -0,0 +1,37 @@ +/* Multiple versions of strchrnul. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strchrnul) __strchrnul_ppc attribute_hidden; +extern __typeof (__strchrnul) __strchrnul_power7 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__strchrnul, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strchrnul_power7 + : __strchrnul_ppc); + +weak_alias (__strchrnul, strchrnul) +#else +#include <string/strchrnul.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strlen-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strlen-power7.S new file mode 100644 index 0000000000..7681b827d6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strlen-power7.S @@ -0,0 +1,36 @@ +/* Optimized strlen implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(__strlen_power7); \ + .type C_SYMBOL_NAME(__strlen_power7),@function; \ + C_LABEL(__strlen_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strlen_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/strlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strlen-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strlen-ppc32.S new file mode 100644 index 0000000000..b665977e17 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strlen-ppc32.S @@ -0,0 +1,41 @@ +/* Default strlen implementation for PowerPC32. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) + +#include <sysdep.h> + +# undef ENTRY +# define ENTRY(name) \ + .globl C_SYMBOL_NAME(__strlen_ppc); \ + .type C_SYMBOL_NAME(__strlen_ppc),@function; \ + C_LABEL(__strlen_ppc) \ + cfi_startproc; + +# undef END +# define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strlen_ppc) + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_strlen; __GI_strlen = __strlen_ppc + +#endif + +#include <sysdeps/powerpc/powerpc32/strlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strlen.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strlen.c new file mode 100644 index 0000000000..c13940e999 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strlen.c @@ -0,0 +1,33 @@ +/* Multiple versions of strlen. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) +# define strlen __redirect_strlen +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strlen) __strlen_ppc attribute_hidden; +extern __typeof (strlen) __strlen_power7 attribute_hidden; +# undef strlen + +libc_ifunc_redirected (__redirect_strlen, strlen, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strlen_power7 + : __strlen_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase-power7.c new file mode 100644 index 0000000000..a49bed9278 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase-power7.c @@ -0,0 +1,26 @@ +/* Optimized strcasecmp_l implememtation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + + +#include <string.h> + +#define __strncasecmp __strncasecmp_power7 + +extern __typeof (strncasecmp) __strncasecmp_power7 attribute_hidden; + +#include <string/strncase.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase.c new file mode 100644 index 0000000000..089faa9853 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase.c @@ -0,0 +1,41 @@ +/* Multiple versions of strncasecmp. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# define strncasecmp __strncasecmp_ppc +extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden; +extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden; +#endif + +#include <string/strncase.c> +#undef strncasecmp + +#if IS_IN (libc) +# include <shlib-compat.h> +# include "init-arch.h" + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__strncasecmp) __libc_strncasecmp; +libc_ifunc (__libc_strncasecmp, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncasecmp_power7 + : __strncasecmp_ppc); +weak_alias (__libc_strncasecmp, strncasecmp) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l-power7.c new file mode 100644 index 0000000000..80f7d48133 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l-power7.c @@ -0,0 +1,26 @@ +/* Optimized strcasecmp_l implememtation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define __strncasecmp_l __strncasecmp_l_power7 +#define USE_IN_EXTENDED_LOCALE_MODEL 1 + +extern __typeof (strncasecmp_l) __strncasecmp_l_power7 attribute_hidden; + +#include <string/strncase.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l.c new file mode 100644 index 0000000000..c988c8dd3f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l.c @@ -0,0 +1,42 @@ +/* Multiple versions of strncasecmp_l. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# define strncasecmp_l __strncasecmp_l_ppc +extern __typeof (__strncasecmp_l) __strncasecmp_l_ppc attribute_hidden; +extern __typeof (__strncasecmp_l) __strncasecmp_l_power7 attribute_hidden; +#endif + +#include <string/strncase_l.c> +#undef strncasecmp_l + +#if IS_IN (libc) +# include <shlib-compat.h> +# include "init-arch.h" + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__strncasecmp_l) __libc_strncasecmp_l; +libc_ifunc (__libc_strncasecmp_l, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncasecmp_l_power7 + : __strncasecmp_l_ppc); + +weak_alias (__libc_strncasecmp_l, strncasecmp_l) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncmp-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncmp-power7.S new file mode 100644 index 0000000000..cbe969a5a4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncmp-power7.S @@ -0,0 +1,38 @@ +/* Optimized strcmp implementation for POWER7/PowerPC32. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef EALIGN +#define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__strncmp_power7); \ + .type C_SYMBOL_NAME(__strncmp_power7),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__strncmp_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strncmp_power7) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncmp-ppc32.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncmp-ppc32.S new file mode 100644 index 0000000000..2f5d2d3651 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncmp-ppc32.S @@ -0,0 +1,40 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && IS_IN (libc) +# undef EALIGN +# define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(__strncmp_ppc); \ + .type C_SYMBOL_NAME(__strncmp_ppc),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(__strncmp_ppc) \ + cfi_startproc; + +# undef END +# define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strncmp_ppc) + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_strncmp; __GI_strncmp = __strncmp_ppc +#endif + +#include <sysdeps/powerpc/powerpc32/power4/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncmp.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncmp.c new file mode 100644 index 0000000000..bb4e892df8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strncmp.c @@ -0,0 +1,39 @@ +/* Multiple versions of strncmp. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if defined SHARED && IS_IN (libc) +# define strncmp __redirect_strncmp +/* Omit the strncmp inline definitions because it would redefine strncmp. */ +# define __NO_STRING_INLINES +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strncmp) __strncmp_ppc attribute_hidden; +extern __typeof (strncmp) __strncmp_power4 attribute_hidden; +extern __typeof (strncmp) __strncmp_power7 attribute_hidden; +# undef strncmp + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strncmp, strncmp, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncmp_power7 + : __strncmp_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-power7.S new file mode 100644 index 0000000000..3f5a32f0c6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-power7.S @@ -0,0 +1,40 @@ +/* Optimized strnlen implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(__strnlen_power7); \ + .type C_SYMBOL_NAME(__strnlen_power7),@function; \ + C_LABEL(__strnlen_power7) \ + cfi_startproc; + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(__strnlen_power7) + +#undef libc_hidden_def +#define libc_hidden_def(name) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc32/power7/strnlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-ppc32.c new file mode 100644 index 0000000000..b41fc9d359 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-ppc32.c @@ -0,0 +1,28 @@ +/* Default strnlen implementation for PowerPC32. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define STRNLEN __strnlen_ppc +#ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__strnlen_ppc, __GI_strnlen, __strnlen_ppc); \ + strong_alias (__strnlen_ppc, __strnlen_ppc_1); \ + __hidden_ver1 (__strnlen_ppc_1, __GI___strnlen, __strnlen_ppc_1); +#endif + +#include <string/strnlen.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen.c new file mode 100644 index 0000000000..f2883e69eb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/strnlen.c @@ -0,0 +1,36 @@ +/* Multiple versions of strnlen. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define strnlen __redirect_strnlen +# define __strnlen __redirect___strnlen +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strnlen) __strnlen_ppc attribute_hidden; +extern __typeof (__strnlen) __strnlen_power7 attribute_hidden; +# undef strnlen +# undef __strnlen + +libc_ifunc_redirected (__redirect___strnlen, __strnlen, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strnlen_power7 + : __strnlen_ppc); +weak_alias (__strnlen, strnlen) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-power6.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-power6.c new file mode 100644 index 0000000000..6610b5ef82 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-power6.c @@ -0,0 +1,26 @@ +/* wcschr.c - Wide Character Search for powerpc32/power6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <wchar.h> + +#define WCSCHR __wcschr_power6 + +#undef libc_hidden_def +#define libc_hidden_def(name) + +#include <sysdeps/powerpc/power6/wcschr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-power7.c new file mode 100644 index 0000000000..7e22c441ac --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-power7.c @@ -0,0 +1,26 @@ +/* wcschr.c - Wide Character Search for powerpc32/power7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <wchar.h> + +#define WCSCHR __wcschr_power7 + +#undef libc_hidden_def +#define libc_hidden_def(name) + +#include <sysdeps/powerpc/power6/wcschr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-ppc32.c new file mode 100644 index 0000000000..777ec080b2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-ppc32.c @@ -0,0 +1,43 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <wchar.h> + +#if IS_IN (libc) +# undef libc_hidden_weak +# define libc_hidden_weak(name) + +# undef weak_alias +# undef libc_hidden_def + +# ifdef SHARED +# define libc_hidden_def(name) \ + __hidden_ver1 (__wcschr_ppc, __GI_wcschr, __wcschr_ppc); \ + strong_alias (__wcschr_ppc, __wcschr_ppc_1); \ + __hidden_ver1 (__wcschr_ppc_1, __GI___wcschr, __wcschr_ppc_1); +# define weak_alias(name,alias) +# else +# define weak_alias(name, alias) \ + _weak_alias(__wcschr_ppc, __wcschr) +# define libc_hidden_def(name) +# endif /* SHARED */ +#endif + +extern __typeof (wcschr) __wcschr_ppc; + +#define WCSCHR __wcschr_ppc +#include <wcsmbs/wcschr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr.c new file mode 100644 index 0000000000..059665f1b1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcschr.c @@ -0,0 +1,41 @@ +/* Multiple versions of wcschr + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define wcschr __redirect_wcschr +# include <wchar.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__redirect_wcschr) __wcschr_ppc attribute_hidden; +extern __typeof (__redirect_wcschr) __wcschr_power6 attribute_hidden; +extern __typeof (__redirect_wcschr) __wcschr_power7 attribute_hidden; + +extern __typeof (__redirect_wcschr) __libc_wcschr; + +libc_ifunc (__libc_wcschr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __wcschr_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __wcschr_power6 + : __wcschr_ppc); +#undef wcschr +weak_alias (__libc_wcschr, wcschr) +#else +#include <wcsmbs/wcschr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-power6.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-power6.c new file mode 100644 index 0000000000..8e732fc80c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-power6.c @@ -0,0 +1,22 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <wchar.h> + +#define WCSCPY __wcscpy_power6 + +#include <sysdeps/powerpc/power6/wcscpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-power7.c new file mode 100644 index 0000000000..dece1024f4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-power7.c @@ -0,0 +1,22 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <wchar.h> + +#define WCSCPY __wcscpy_power7 + +#include <sysdeps/powerpc/power6/wcscpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c new file mode 100644 index 0000000000..b48ff54d92 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c @@ -0,0 +1,26 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <wchar.h> + +#if IS_IN (libc) +# define WCSCPY __wcscpy_ppc +#endif + +extern __typeof (wcscpy) __wcscpy_ppc; + +#include <wcsmbs/wcscpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c new file mode 100644 index 0000000000..a59e794f03 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c @@ -0,0 +1,36 @@ +/* Multiple versions of wcscpy + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <wchar.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (wcscpy) __wcscpy_ppc attribute_hidden; +extern __typeof (wcscpy) __wcscpy_power6 attribute_hidden; +extern __typeof (wcscpy) __wcscpy_power7 attribute_hidden; + +libc_ifunc (wcscpy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __wcscpy_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __wcscpy_power6 + : __wcscpy_ppc); +#else +#include <wcsmbs/wcscpy.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-power6.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-power6.c new file mode 100644 index 0000000000..0391e12442 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-power6.c @@ -0,0 +1,20 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define WCSRCHR __wcsrchr_power6 + +#include <sysdeps/powerpc/power6/wcsrchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-power7.c new file mode 100644 index 0000000000..1167a75734 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-power7.c @@ -0,0 +1,20 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define WCSRCHR __wcsrchr_power7 + +#include <sysdeps/powerpc/power6/wcsrchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-ppc32.c new file mode 100644 index 0000000000..1c8e12eefb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-ppc32.c @@ -0,0 +1,26 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <wchar.h> + +#if IS_IN (libc) +# define WCSRCHR __wcsrchr_ppc +#endif + +extern __typeof (wcsrchr) __wcsrchr_ppc; + +#include <wcsmbs/wcsrchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr.c new file mode 100644 index 0000000000..10820443b4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr.c @@ -0,0 +1,36 @@ +/* Multiple versions of wcsrchr + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <wchar.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (wcsrchr) __wcsrchr_ppc attribute_hidden; +extern __typeof (wcsrchr) __wcsrchr_power6 attribute_hidden; +extern __typeof (wcsrchr) __wcsrchr_power7 attribute_hidden; + +libc_ifunc (wcsrchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __wcsrchr_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __wcsrchr_power6 + : __wcsrchr_ppc); +#else +#include <wcsmbs/wcsrchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wordcopy-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wordcopy-power7.c new file mode 100644 index 0000000000..d2095d85ac --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wordcopy-power7.c @@ -0,0 +1,23 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define WORDCOPY_FWD_ALIGNED _wordcopy_fwd_aligned_power7 +#define WORDCOPY_FWD_DEST_ALIGNED _wordcopy_fwd_dest_aligned_power7 +#define WORDCOPY_BWD_ALIGNED _wordcopy_bwd_aligned_power7 +#define WORDCOPY_BWD_DEST_ALIGNED _wordcopy_bwd_dest_aligned_power7 + +#include <sysdeps/powerpc/power6/wordcopy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wordcopy-ppc32.c b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wordcopy-ppc32.c new file mode 100644 index 0000000000..ecdc2fa73d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/multiarch/wordcopy-ppc32.c @@ -0,0 +1,27 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define WORDCOPY_FWD_ALIGNED _wordcopy_fwd_aligned_ppc +# define WORDCOPY_FWD_DEST_ALIGNED _wordcopy_fwd_dest_aligned_ppc +# define WORDCOPY_BWD_ALIGNED _wordcopy_bwd_aligned_ppc +# define WORDCOPY_BWD_DEST_ALIGNED _wordcopy_bwd_dest_aligned_ppc + +# include <sysdeps/powerpc/power4/wordcopy.c> +#else +# include <string/wordcopy.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power4/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/strncmp.S new file mode 100644 index 0000000000..42a67e7e8a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power4/strncmp.S @@ -0,0 +1,196 @@ +/* Optimized strcmp implementation for PowerPC32. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */ + +EALIGN (strncmp, 4, 0) + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r10 +#define rWORD4 r11 +#define rFEFE r8 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r9 /* constant 0x7f7f7f7f */ +#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */ +#define rBITDIF r11 /* bits that differ in s1 & s2 words */ +#define rTMP r12 + + dcbt 0,rSTR1 + or rTMP, rSTR2, rSTR1 + lis r7F7F, 0x7f7f + dcbt 0,rSTR2 + clrlwi. rTMP, rTMP, 30 + cmplwi cr1, rN, 0 + lis rFEFE, -0x101 + bne L(unaligned) +/* We are word aligned so set up for two loops. first a word + loop, then fall into the byte loop if any residual. */ + srwi. rTMP, rN, 2 + clrlwi rN, rN, 30 + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + cmplwi cr1, rN, 0 + beq L(unaligned) + + mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */ + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) + b L(g1) + +L(g0): + lwzu rWORD1, 4(rSTR1) + bne- cr1, L(different) + lwzu rWORD2, 4(rSTR2) +L(g1): add rTMP, rFEFE, rWORD1 + nor rNEG, r7F7F, rWORD1 + bdz L(tail) + and. rTMP, rTMP, rNEG + cmpw cr1, rWORD1, rWORD2 + beq+ L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + +#ifdef __LITTLE_ENDIAN__ +L(endstring): + slwi rTMP, rTMP, 1 + addi rTMP2, rTMP, -1 + andc rTMP2, rTMP2, rTMP + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ + rlwinm rTMP, rWORD1, 8, 0xffffffff + rldimi rTMP2, rWORD2, 24, 32 + rldimi rTMP, rWORD1, 24, 32 + rlwimi rTMP2, rWORD2, 24, 16, 23 + rlwimi rTMP, rWORD1, 24, 16, 23 + xor. rBITDIF, rTMP, rTMP2 + sub rRTN, rTMP, rTMP2 + bgelr+ + ori rRTN, rTMP2, 1 + blr + +L(different): + lwz rWORD1, -4(rSTR1) + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ + rlwinm rTMP, rWORD1, 8, 0xffffffff + rldimi rTMP2, rWORD2, 24, 32 + rldimi rTMP, rWORD1, 24, 32 + rlwimi rTMP2, rWORD2, 24, 16, 23 + rlwimi rTMP, rWORD1, 24, 16, 23 + xor. rBITDIF, rTMP, rTMP2 + sub rRTN, rTMP, rTMP2 + bgelr+ + ori rRTN, rTMP2, 1 + blr + +#else +L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzw rBITDIF, rBITDIF + cntlzw rNEG, rNEG + addi rNEG, rNEG, 7 + cmpw cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + bgelr+ cr1 +L(equal): + li rRTN, 0 + blr + +L(different): + lwz rWORD1, -4(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + bgelr+ +L(highbit): + ori rRTN, rWORD2, 1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(tail): + and. rTMP, rTMP, rNEG + cmpw cr1, rWORD1, rWORD2 + bne- L(endstring) + addi rSTR1, rSTR1, 4 + bne- cr1, L(different) + addi rSTR2, rSTR2, 4 + cmplwi cr1, rN, 0 +L(unaligned): + mtctr rN /* Power4 wants mtctr 1st in dispatch group */ + ble cr1, L(ux) +L(uz): + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + .align 4 +L(u1): + cmpwi cr1, rWORD1, 0 + bdz L(u4) + cmpw rWORD1, rWORD2 + beq- cr1, L(u4) + bne- L(u4) + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + cmpwi cr1, rWORD3, 0 + bdz L(u3) + cmpw rWORD3, rWORD4 + beq- cr1, L(u3) + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + cmpwi cr1, rWORD1, 0 + bdz L(u4) + cmpw rWORD1, rWORD2 + beq- cr1, L(u4) + bne- L(u4) + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + cmpwi cr1, rWORD3, 0 + bdz L(u3) + cmpw rWORD3, rWORD4 + beq- cr1, L(u3) + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + b L(u1) + +L(u3): sub rRTN, rWORD3, rWORD4 + blr +L(u4): sub rRTN, rWORD1, rWORD2 + blr +L(ux): + li rRTN, 0 + blr +END (strncmp) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/Implies new file mode 100644 index 0000000000..02d222d22a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/Implies @@ -0,0 +1,4 @@ +powerpc/power5+/fpu +powerpc/power5+ +powerpc/powerpc32/power5/fpu +powerpc/powerpc32/power5 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/multiarch/Implies new file mode 100644 index 0000000000..76a985188e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power5/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceil.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceil.S new file mode 100644 index 0000000000..efe7be8242 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceil.S @@ -0,0 +1,36 @@ +/* ceil function. PowerPC32/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__ceil, 4, 0) + frip fp1, fp1 + blr + END (__ceil) + +weak_alias (__ceil, ceil) + +#ifdef NO_LONG_DOUBLE +weak_alias (__ceil, ceill) +strong_alias (__ceil, __ceill) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __ceil, ceill, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceilf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceilf.S new file mode 100644 index 0000000000..cff058e7ef --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_ceilf.S @@ -0,0 +1,29 @@ +/* ceilf function. PowerPC32/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__ceilf, 4, 0) + frip fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__ceilf) + +weak_alias (__ceilf, ceilf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_floor.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_floor.S new file mode 100644 index 0000000000..9f040d8457 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_floor.S @@ -0,0 +1,36 @@ +/* floor function. PowerPC32/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__floor, 4, 0) + frim fp1, fp1 + blr + END (__floor) + +weak_alias (__floor, floor) + +#ifdef NO_LONG_DOUBLE +weak_alias (__floor, floorl) +strong_alias (__floor, __floorl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __floor, floorl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_floorf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_floorf.S new file mode 100644 index 0000000000..b84e4c64fb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_floorf.S @@ -0,0 +1,29 @@ +/* floorf function. PowerPC32/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__floorf, 4, 0) + frim fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__floorf) + +weak_alias (__floorf, floorf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S new file mode 100644 index 0000000000..adbc7ebe18 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S @@ -0,0 +1,59 @@ +/* lround function. POWER5+, PowerPC32 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long [r3] llround (float x [fp1]) + IEEE 1003.1 lround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we pre-round using the V2.02 Floating Round to Integer Nearest + instruction before we use the Floating Convert to Integer Word with + round to zero instruction. */ + + .machine "power5" +ENTRY (__llround) + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + frin fp2,fp1 + fctidz fp3,fp2 /* Convert To Integer Word lround toward 0. */ + stfd fp3,8(r1) + nop /* Ensure the following load is in a different dispatch */ + nop /* group to avoid pipe stall on POWER4&5. */ + nop + lwz r3,8+HIWORD(r1) + lwz r4,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__llround) + +weak_alias (__llround, llround) + +strong_alias (__llround, __llroundf) +weak_alias (__llround, llroundf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_llroundf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_llroundf.S new file mode 100644 index 0000000000..030d2fdff8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_llroundf.S @@ -0,0 +1 @@ +/* __llroundf is in s_llround.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S new file mode 100644 index 0000000000..f61846331d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S @@ -0,0 +1,57 @@ +/* lround function. POWER5+, PowerPC32 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long [r3] lround (float x [fp1]) + IEEE 1003.1 lround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we pre-round using the V2.02 Floating Round to Integer Nearest + instruction before we use the Floating Convert to Integer Word with + round to zero instruction. */ + + .machine "power5" +ENTRY (__lround) + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + frin fp2,fp1 + fctiwz fp3,fp2 /* Convert To Integer Word lround toward 0. */ + stfd fp3,8(r1) + nop /* Ensure the following load is in a different dispatch */ + nop /* group to avoid pipe stall on POWER4&5. */ + nop + lwz r3,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__lround) + +weak_alias (__lround, lround) + +strong_alias (__lround, __lroundf) +weak_alias (__lround, lroundf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_round.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_round.S new file mode 100644 index 0000000000..91b42352f3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_round.S @@ -0,0 +1,36 @@ +/* round function. PowerPC32/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__round, 4, 0) + frin fp1, fp1 + blr + END (__round) + +weak_alias (__round, round) + +#ifdef NO_LONG_DOUBLE +weak_alias (__round, roundl) +strong_alias (__round, __roundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __round, roundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_roundf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_roundf.S new file mode 100644 index 0000000000..4e0c7e5cec --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_roundf.S @@ -0,0 +1,29 @@ +/* roundf function. PowerPC32/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__roundf, 4, 0) + frin fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__roundf) + +weak_alias (__roundf, roundf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_trunc.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_trunc.S new file mode 100644 index 0000000000..ceca529826 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_trunc.S @@ -0,0 +1,36 @@ +/* trunc function. PowerPC32/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__trunc, 4, 0) + friz fp1, fp1 + blr + END (__trunc) + +weak_alias (__trunc, trunc) + +#ifdef NO_LONG_DOUBLE +weak_alias (__trunc, truncl) +strong_alias (__trunc, __truncl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __trunc, truncl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_truncf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_truncf.S new file mode 100644 index 0000000000..60be314c28 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/fpu/s_truncf.S @@ -0,0 +1,29 @@ +/* truncf function. PowerPC32/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__truncf, 4, 0) + friz fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__truncf) + +weak_alias (__truncf, truncf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/multiarch/Implies new file mode 100644 index 0000000000..54b3931625 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5+/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power5/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/Implies new file mode 100644 index 0000000000..17139bf21c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/power4/fpu +powerpc/powerpc32/power4 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/multiarch/Implies new file mode 100644 index 0000000000..c6c090a60e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power4/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S new file mode 100644 index 0000000000..09a2fe3865 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S @@ -0,0 +1,61 @@ +/* isnan(). PowerPC32 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power5 +EALIGN (__isnan, 4, 0) + stwu r1,-32(r1) + cfi_adjust_cfa_offset (32) + ori r1,r1,0 + stfd fp1,24(r1) /* copy FPR to GPR */ + ori r1,r1,0 + lwz r4,24+HIWORD(r1) + lwz r5,24+LOWORD(r1) + lis r0,0x7ff0 /* const long r0 0x7ff00000 00000000 */ + clrlwi r4,r4,1 /* x = fabs(x) */ + cmpw cr7,r4,r0 /* if (fabs(x) =< inf) */ + cmpwi cr6,r5,0 + li r3,0 /* then return 0 */ + addi r1,r1,32 + cfi_adjust_cfa_offset (-32) + bltlr+ cr7 + bgt- cr7,L(NaN) + beqlr+ cr6 +L(NaN): + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/s_isnanf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/s_isnanf.S new file mode 100644 index 0000000000..7948f52e84 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/s_isnanf.S @@ -0,0 +1,45 @@ +/* isnan(). PowerPC32 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnanf(x) */ + .machine power5 +EALIGN (__isnanf, 4, 0) + stwu r1,-32(r1) + cfi_adjust_cfa_offset (32) + stfs fp1,28(r1) /* copy FPR to GPR */ + nop + nop + lwz r4,28(r1) + lis r0,0x7f80 /* const long r0 0x7f800000 */ + clrlwi r4,r4,1 /* x = fabs(x) */ + cmpw cr7,r4,r0 /* if (fabs(x) =< inf) */ + li r3,0 /* then return 0 */ + addi r1,r1,32 + cfi_adjust_cfa_offset (-32) + blelr+ cr7 +L(NaN): + li r3,1 /* else return 1 */ + blr + END (__isnanf) + +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt_compat.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt_compat.S new file mode 100644 index 0000000000..93625c5aa9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt_compat.S @@ -0,0 +1,106 @@ +/* sqrt function. PowerPC32 version. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* double [fp1] sqrt (double x [fp1]) + Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). + The fsqrt instruction generates the correct value for all inputs and + sets the appropriate floating point exceptions. Extended checking is + only needed to set errno (via __kernel_standard) if the input value + is negative. + + So compare the input value against the absolute value of itself. + This will compare equal unless the value is negative (EDOM) or a NAN, + in which case we branch to the extend wrapper. If equal we can return + the result directly. + + This part of the function looks like a leaf routine, so no need to + stack a frame or execute prologue/epilogue code. It is safe to + branch directly to w_sqrt as long as the input value (f1) is + preserved. Putting the sqrt result into f2 (float parameter 2) + allows passing both the input value and sqrt result into the extended + wrapper so there is no need to recompute. + + This tactic avoids the overhead of stacking a frame for the normal + (non-error) case. Until gcc supports prologue shrink-wrapping + this is the best we can do. */ + + .section ".text" + .machine power4 +EALIGN (__sqrt, 5, 0) + fabs fp0,fp1 + fsqrt fp2,fp1 + fcmpu cr1,fp0,fp1 + bne- cr1,.Lw_sqrt + fmr fp1,fp2 + blr + .align 4 +.Lw_sqrt: + mflr r0 + stwu r1,-16(r1) + cfi_adjust_cfa_offset(16) + fmr fp12,fp2 + stw r0,20(r1) + stw r30,8(r1) + cfi_offset(lr,20-16) + cfi_offset(r30,8-16) +#ifdef SHARED + SETUP_GOT_ACCESS(r30,got_label) + addis r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha + addi r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l + lwz r9,_LIB_VERSION@got(30) + lwz r0,0(r9) +#else + lis r9,_LIB_VERSION@ha + lwz r0,_LIB_VERSION@l(r9) +#endif +/* if (_LIB_VERSION == _IEEE_) return z; */ + cmpwi cr7,r0,-1 + beq- cr7,.L4 +/* if (x != x) return z; !isnan*/ + fcmpu cr7,fp1,fp1 + bne- cr7,.L4 +/* if (x < 0.0) + return __kernel_standard (x, x, 26) */ + fmr fp2,fp1 + li r3,26 + bne- cr1,.L11 +.L4: + lwz r0,20(r1) + fmr fp1,fp12 + lwz r30,8(r1) + addi r1,r1,16 + mtlr r0 + blr +.L11: + bl __kernel_standard@plt + fmr fp12,fp1 + b .L4 + END (__sqrt) + +weak_alias (__sqrt, sqrt) + +#ifdef NO_LONG_DOUBLE +weak_alias (__sqrt, sqrtl) +strong_alias (__sqrt, __sqrtl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf_compat.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf_compat.S new file mode 100644 index 0000000000..2ca86b6155 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf_compat.S @@ -0,0 +1,98 @@ +/* sqrtf function. PowerPC32 version. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* float [fp1] sqrts (float x [fp1]) + Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). + The fsqrts instruction generates the correct value for all inputs and + sets the appropriate floating point exceptions. Extended checking is + only needed to set errno (via __kernel_standard) if the input value + is negative. + + So compare the input value against the absolute value of itself. + This will compare equal unless the value is negative (EDOM) or a NAN, + in which case we branch to the extend wrapper. If equal we can return + the result directly. + + This part of the function looks like a leaf routine, so no need to + stack a frame or execute prologue/epilogue code. It is safe to + branch directly to w_sqrt as long as the input value (f1) is + preserved. Putting the sqrt result into f2 (float parameter 2) + allows passing both the input value and sqrt result into the extended + wrapper so there is no need to recompute. + + This tactic avoids the overhead of stacking a frame for the normal + (non-error) case. Until gcc supports prologue shrink-wrapping + this is the best we can do. */ + + .section ".text" + .machine power4 +EALIGN (__sqrtf, 5, 0) + fabs fp0,fp1 + fsqrts fp2,fp1 + fcmpu cr1,fp0,fp1 + bne- cr1,.Lw_sqrtf + fmr fp1,fp2 + blr + .align 4 +.Lw_sqrtf: + mflr r0 + stwu r1,-16(r1) + cfi_adjust_cfa_offset(16) + fmr fp12,fp2 + stw r0,20(r1) + stw r30,8(r1) + cfi_offset(lr,20-16) + cfi_offset(r30,8-16) +#ifdef SHARED + SETUP_GOT_ACCESS(r30,got_label) + addis r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha + addi r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l + lwz r9,_LIB_VERSION@got(30) + lwz r0,0(r9) +#else + lis r9,_LIB_VERSION@ha + lwz r0,_LIB_VERSION@l(r9) +#endif +/* if (_LIB_VERSION == _IEEE_) return z; */ + cmpwi cr7,r0,-1 + beq- cr7,.L4 +/* if (x != x, 0) return z; !isnan */ + fcmpu cr7,fp1,fp1 + bne- cr7,.L4 +/* if (x < 0.0) + return __kernel_standard (x, x, 126) */ + fmr fp2,fp1 + li r3,126 + bne- cr1,.L11 +.L4: + lwz r0,20(r1) + fmr fp1,fp12 + lwz r30,8(r1) + addi r1,r1,16 + mtlr r0 + blr +.L11: + bl __kernel_standard@plt + fmr fp12,fp1 + b .L4 + END (__sqrtf) + +weak_alias (__sqrtf, sqrtf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power5/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/multiarch/Implies new file mode 100644 index 0000000000..d29e3853ab --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power5/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power4/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/Implies new file mode 100644 index 0000000000..8e5b58a57a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/power5+/fpu +powerpc/powerpc32/power5+ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/multiarch/Implies new file mode 100644 index 0000000000..c66805ee63 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power5+/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_copysign.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_copysign.S new file mode 100644 index 0000000000..d6cc8011ae --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_copysign.S @@ -0,0 +1,58 @@ +/* copysign(). PowerPC32/POWER6 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* double [f1] copysign (double [f1] x, double [f2] y); + copysign(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + + .section ".text" + .type __copysign, @function + .machine power6 +EALIGN (__copysign, 4, 0) + CALL_MCOUNT + fcpsgn fp1,fp2,fp1 + blr +END (__copysign) + +hidden_def (__copysign) +weak_alias (__copysign, copysign) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__copysign, __copysignf) +hidden_def (__copysignf) +weak_alias (__copysignf, copysignf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__copysign, __copysignl) +weak_alias (__copysign, copysignl) +#endif + +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, copysign, copysignl, GLIBC_2_0) +# endif +#else +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, copysign, copysignl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_copysignf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_copysignf.S new file mode 100644 index 0000000000..d4aa702d07 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_copysignf.S @@ -0,0 +1 @@ +/* This function uses the same code as s_copysign.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S new file mode 100644 index 0000000000..5b19433cbf --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S @@ -0,0 +1,61 @@ +/* isnan(). PowerPC32 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power6 +EALIGN (__isnan, 4, 0) + stwu r1,-32(r1) + cfi_adjust_cfa_offset (32) + ori r1,r1,0 + stfd fp1,24(r1) /* copy FPR to GPR */ + ori r1,r1,0 + lwz r4,24+HIWORD(r1) + lwz r5,24+LOWORD(r1) + lis r0,0x7ff0 /* const long r0 0x7ff00000 00000000 */ + clrlwi r4,r4,1 /* x = fabs(x) */ + cmpw cr7,r4,r0 /* if (fabs(x) =< inf) */ + cmpwi cr6,r5,0 + li r3,0 /* then return 0 */ + addi r1,r1,32 + cfi_adjust_cfa_offset (-32) + bltlr+ cr7 + bgt- cr7,L(NaN) + beqlr+ cr6 +L(NaN): + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S new file mode 100644 index 0000000000..7a19ed86d2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_isnanf.S @@ -0,0 +1,44 @@ +/* isnanf(). PowerPC32 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnanf(x) */ + .machine power6 +EALIGN (__isnanf, 4, 0) + stwu r1,-32(r1) + cfi_adjust_cfa_offset (32) + ori r1,r1,0 + stfs fp1,24(r1) /* copy FPR to GPR */ + ori r1,r1,0 + lwz r4,24(r1) + lis r0,0x7f80 /* const long r0 0x7f800000 */ + clrlwi r4,r4,1 /* x = fabs(x) */ + cmpw cr7,r4,r0 /* if (fabs(x) =< inf) */ + li r3,0 /* then return 0 */ + addi r1,r1,32 + cfi_adjust_cfa_offset (-32) + blelr+ cr7 +L(NaN): + li r3,1 /* else return 1 */ + blr + END (__isnanf) + +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S new file mode 100644 index 0000000000..326e77361b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S @@ -0,0 +1,46 @@ +/* Round double to long int. PowerPC32 on PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long long int[r3, r4] __llrint (double x[fp1]) */ +ENTRY (__llrint) + CALL_MCOUNT + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + fctid fp13,fp1 + stfd fp13,8(r1) +/* Insure the following load is in a different dispatch group by + inserting "group ending nop". */ + ori r1,r1,0 + lwz r3,8+HIWORD(r1) + lwz r4,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__llrint) + +weak_alias (__llrint, llrint) + +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S new file mode 100644 index 0000000000..0950e7e7c7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S @@ -0,0 +1,38 @@ +/* Round float to long int. PowerPC32 on PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* long long int[r3, r4] __llrintf (float x[fp1]) */ +ENTRY (__llrintf) + CALL_MCOUNT + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + fctid fp13,fp1 + stfd fp13,8(r1) +/* Insure the following load is in a different dispatch group by + inserting "group ending nop". */ + ori r1,r1,0 + lwz r3,8+HIWORD(r1) + lwz r4,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__llrintf) + +weak_alias (__llrintf, llrintf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S new file mode 100644 index 0000000000..83ba999a39 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S @@ -0,0 +1,59 @@ +/* lround function. POWER5+, PowerPC32 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long [r3] llround (float x [fp1]) + IEEE 1003.1 lround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we pre-round using the V2.02 Floating Round to Integer Nearest + instruction before we use the Floating Convert to Integer Word with + round to zero instruction. */ + + .machine "power5" +ENTRY (__llround) + stwu r1,-16(r1) + cfi_adjust_cfa_offset (16) + frin fp2,fp1 + fctidz fp3,fp2 /* Convert To Integer Word lround toward 0. */ + stfd fp3,8(r1) +/* Insure the following load is in a different dispatch group by + inserting "group ending nop". */ + ori r1,r1,0 + lwz r3,8+HIWORD(r1) + lwz r4,8+LOWORD(r1) + addi r1,r1,16 + blr + END (__llround) + +weak_alias (__llround, llround) + +strong_alias (__llround, __llroundf) +weak_alias (__llround, llroundf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llroundf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llroundf.S new file mode 100644 index 0000000000..030d2fdff8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/fpu/s_llroundf.S @@ -0,0 +1 @@ +/* __llroundf is in s_llround.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/memcpy.S new file mode 100644 index 0000000000..81b62cba21 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/memcpy.S @@ -0,0 +1,907 @@ +/* Optimized memcpy implementation for PowerPC32 on POWER6. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. + + Memcpy handles short copies (< 32-bytes) using a binary move blocks + (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled + with the appropriate combination of byte and halfword load/stores. + There is minimal effort to optimize the alignment of short moves. + + Longer moves (>= 32-bytes) justify the effort to get at least the + destination word (4-byte) aligned. Further optimization is + possible when both source and destination are word aligned. + Each case has an optimized unrolled loop. */ + + .machine power6 +EALIGN (memcpy, 5, 0) + CALL_MCOUNT + + stwu 1,-32(1) + cfi_adjust_cfa_offset(32) + cmplwi cr1,5,31 /* check for short move. */ + neg 0,3 + cmplwi cr1,5,31 + clrlwi 10,4,30 /* check alignment of src. */ + andi. 11,3,3 /* check alignment of dst. */ + clrlwi 0,0,30 /* Number of bytes until the 1st word of dst. */ + ble- cr1,L(word_unaligned_short) /* If move < 32 bytes. */ + cmplw cr6,10,11 + stw 31,24(1) + stw 30,20(1) + cfi_offset(31,(24-32)) + cfi_offset(30,(20-32)) + mr 30,3 + beq .L0 + mtcrf 0x01,0 + subf 31,0,5 /* Length after alignment. */ + add 12,4,0 /* Compute src addr after alignment. */ + /* Move 0-3 bytes as needed to get the destination word aligned. */ +1: bf 31,2f + lbz 6,0(4) + bf 30,3f + lhz 7,1(4) + stb 6,0(3) + sth 7,1(3) + addi 3,3,3 + b 0f +3: + stb 6,0(3) + addi 3,3,1 + b 0f +2: bf 30,0f + lhz 6,0(4) + sth 6,0(3) + addi 3,3,2 +0: + clrlwi 10,12,30 /* check alignment of src again. */ + srwi 9,31,2 /* Number of full words remaining. */ + bne- cr6,L(wdu) /* If source is not word aligned. .L6 */ + clrlwi 11,31,30 /* calculate the number of tail bytes */ + b L(word_aligned) + /* Copy words from source to destination, assuming the destination is + aligned on a word boundary. + + At this point we know there are at least 29 bytes left (32-3) to copy. + The next step is to determine if the source is also word aligned. + If not branch to the unaligned move code at .L6. which uses + a load, shift, store strategy. + + Otherwise source and destination are word aligned, and we can use + the optimized word copy loop. */ + .align 4 +.L0: + mr 31,5 + mr 12,4 + bne- cr6,L(wdu) /* If source is not word aligned. .L6 */ + srwi 9,5,2 /* Number of full words remaining. */ + clrlwi 11,5,30 /* calculate the number of tail bytes */ + + /* Move words where destination and source are word aligned. + Use an unrolled loop to copy 4 words (16-bytes) per iteration. + If the copy is not an exact multiple of 16 bytes, 1-3 + words are copied as needed to set up the main loop. After + the main loop exits there may be a tail of 1-3 bytes. These bytes are + copied a halfword/byte at a time as needed to preserve alignment. */ +L(word_aligned): + mtcrf 0x01,9 + srwi 8,31,4 /* calculate the 16 byte loop count */ + cmplwi cr1,9,4 + cmplwi cr6,11,0 + mr 11,12 + + bf 30,1f + lwz 6,0(12) + lwz 7,4(12) + addi 11,12,8 + mtctr 8 + stw 6,0(3) + stw 7,4(3) + addi 10,3,8 + bf 31,4f + lwz 0,8(12) + stw 0,8(3) + blt cr1,3f + addi 11,12,12 + addi 10,3,12 + b 4f + .align 4 +1: + mr 10,3 + mtctr 8 + bf 31,4f + lwz 6,0(12) + addi 11,12,4 + stw 6,0(3) + addi 10,3,4 + + .align 4 +4: + lwz 6,0(11) + lwz 7,4(11) + lwz 8,8(11) + lwz 0,12(11) + stw 6,0(10) + stw 7,4(10) + stw 8,8(10) + stw 0,12(10) + addi 11,11,16 + addi 10,10,16 + bdnz 4b +3: + clrrwi 0,31,2 + mtcrf 0x01,31 + beq cr6,0f +.L9: + add 3,3,0 + add 12,12,0 + +/* At this point we have a tail of 0-3 bytes and we know that the + destination is word aligned. */ +2: bf 30,1f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + mr 3,30 + lwz 30,20(1) + lwz 31,24(1) + addi 1,1,32 + blr + +/* Copy up to 31 bytes. This divided into two cases 0-8 bytes and 9-31 + bytes. Each case is handled without loops, using binary (1,2,4,8) + tests. + + In the short (0-8 byte) case no attempt is made to force alignment + of either source or destination. The hardware will handle the + unaligned load/stores with small delays for crossing 32- 128-byte, + and 4096-byte boundaries. Since these short moves are unlikely to be + unaligned or cross these boundaries, the overhead to force + alignment is not justified. + + The longer (9-31 byte) move is more likely to cross 32- or 128-byte + boundaries. Since only loads are sensitive to the 32-/128-byte + boundaries it is more important to align the source then the + destination. If the source is not already word aligned, we first + move 1-3 bytes as needed. Since we are only word aligned we don't + use double word load/stores to insure that all loads are aligned. + While the destination and stores may still be unaligned, this + is only an issue for page (4096 byte boundary) crossing, which + should be rare for these short moves. The hardware handles this + case automatically with a small (~20 cycle) delay. */ + .align 4 + + cfi_same_value (31) + cfi_same_value (30) +L(word_unaligned_short): + mtcrf 0x01,5 + cmplwi cr6,5,8 + neg 8,4 + clrrwi 9,4,2 + andi. 0,8,3 + beq cr6,L(wus_8) /* Handle moves of 8 bytes. */ +/* At least 9 bytes left. Get the source word aligned. */ + cmplwi cr1,5,16 + mr 12,4 + ble cr6,L(wus_4) /* Handle moves of 0-8 bytes. */ + mr 11,3 + mr 10,5 + cmplwi cr6,0,2 + beq L(wus_tail) /* If the source is already word aligned skip this. */ +/* Copy 1-3 bytes to get source address word aligned. */ + lwz 6,0(9) + subf 10,0,5 + add 12,4,0 + blt cr6,5f + srwi 7,6,16 + bgt cr6,3f +#ifdef __LITTLE_ENDIAN__ + sth 7,0(3) +#else + sth 6,0(3) +#endif + b 7f + .align 4 +3: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,24 + stb 6,0(3) + sth 7,1(3) +#else + stb 7,0(3) + sth 6,1(3) +#endif + b 7f + .align 4 +5: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,8 +#endif + stb 6,0(3) +7: + cmplwi cr1,10,16 + add 11,3,0 + mtcrf 0x01,10 + .align 4 +L(wus_tail): +/* At least 6 bytes left and the source is word aligned. This allows + some speculative loads up front. */ +/* We need to special case the fall-through because the biggest delays + are due to address computation not being ready in time for the + AGEN. */ + lwz 6,0(12) + lwz 7,4(12) + blt cr1,L(wus_tail8) + cmplwi cr0,10,24 +L(wus_tail16): /* Move 16 bytes. */ + stw 6,0(11) + stw 7,4(11) + lwz 6,8(12) + lwz 7,12(12) + stw 6,8(11) + stw 7,12(11) +/* Move 8 bytes more. */ + bf 28,L(wus_tail16p8) + cmplwi cr1,10,28 + lwz 6,16(12) + lwz 7,20(12) + stw 6,16(11) + stw 7,20(11) +/* Move 4 bytes more. */ + bf 29,L(wus_tail16p4) + lwz 6,24(12) + stw 6,24(11) + addi 12,12,28 + addi 11,11,28 + bgt cr1,L(wus_tail2) + /* exactly 28 bytes. Return original dst pointer and exit. */ + addi 1,1,32 + blr + .align 4 +L(wus_tail16p8): /* less than 8 bytes left. */ + beq cr1,L(wus_tailX) /* exactly 16 bytes, early exit. */ + cmplwi cr1,10,20 + bf 29,L(wus_tail16p2) +/* Move 4 bytes more. */ + lwz 6,16(12) + stw 6,16(11) + addi 12,12,20 + addi 11,11,20 + bgt cr1,L(wus_tail2) + /* exactly 20 bytes. Return original dst pointer and exit. */ + addi 1,1,32 + blr + .align 4 +L(wus_tail16p4): /* less than 4 bytes left. */ + addi 12,12,24 + addi 11,11,24 + bgt cr0,L(wus_tail2) + /* exactly 24 bytes. Return original dst pointer and exit. */ + addi 1,1,32 + blr + .align 4 +L(wus_tail16p2): /* 16 bytes moved, less than 4 bytes left. */ + addi 12,12,16 + addi 11,11,16 + b L(wus_tail2) + + .align 4 +L(wus_tail8): /* Move 8 bytes. */ +/* r6, r7 already loaded speculatively. */ + cmplwi cr1,10,8 + cmplwi cr0,10,12 + bf 28,L(wus_tail4) + stw 6,0(11) + stw 7,4(11) +/* Move 4 bytes more. */ + bf 29,L(wus_tail8p4) + lwz 6,8(12) + stw 6,8(11) + addi 12,12,12 + addi 11,11,12 + bgt cr0,L(wus_tail2) + /* exactly 12 bytes. Return original dst pointer and exit. */ + addi 1,1,32 + blr + .align 4 +L(wus_tail8p4): /* less than 4 bytes left. */ + addi 12,12,8 + addi 11,11,8 + bgt cr1,L(wus_tail2) + /* exactly 8 bytes. Return original dst pointer and exit. */ + addi 1,1,32 + blr + + .align 4 +L(wus_tail4): /* Move 4 bytes. */ +/* r6 already loaded speculatively. If we are here we know there is + more than 4 bytes left. So there is no need to test. */ + addi 12,12,4 + stw 6,0(11) + addi 11,11,4 +L(wus_tail2): /* Move 2-3 bytes. */ + bf 30,L(wus_tail1) + lhz 6,0(12) + sth 6,0(11) + bf 31,L(wus_tailX) + lbz 7,2(12) + stb 7,2(11) + addi 1,1,32 + blr +L(wus_tail1): /* Move 1 byte. */ + bf 31,L(wus_tailX) + lbz 6,0(12) + stb 6,0(11) +L(wus_tailX): + /* Return original dst pointer. */ + addi 1,1,32 + blr + +/* Special case to copy 0-8 bytes. */ + .align 4 +L(wus_8): + lwz 6,0(4) + lwz 7,4(4) + stw 6,0(3) + stw 7,4(3) + /* Return original dst pointer. */ + addi 1,1,32 + blr + .align 4 +L(wus_4): + bf 29,L(wus_2) + lwz 6,0(4) + stw 6,0(3) + bf 30,L(wus_5) + lhz 7,4(4) + sth 7,4(3) + bf 31,L(wus_0) + lbz 8,6(4) + stb 8,6(3) + addi 1,1,32 + blr + .align 4 +L(wus_5): + bf 31,L(wus_0) + lbz 6,4(4) + stb 6,4(3) + /* Return original dst pointer. */ + addi 1,1,32 + blr + .align 4 +L(wus_2): /* Move 2-3 bytes. */ + bf 30,L(wus_1) + lhz 6,0(4) + sth 6,0(3) + bf 31,L(wus_0) + lbz 7,2(4) + stb 7,2(3) + addi 1,1,32 + blr + .align 4 +L(wus_1): /* Move 1 byte. */ + bf 31,L(wus_0) + lbz 6,0(4) + stb 6,0(3) + .align 3 +L(wus_0): + /* Return original dst pointer. */ + addi 1,1,32 + blr + + .align 4 + cfi_offset(31,(24-32)) + cfi_offset(30,(20-32)) +L(wdu): + + /* Copy words where the destination is aligned but the source is + not. For power4, power5 and power6 machines there is penalty for + unaligned loads (src) that cross 32-byte, cacheline, or page + boundaries. So we want to use simple (unaligned) loads where + possible but avoid them where we know the load would span a 32-byte + boundary. + + At this point we know we have at least 29 (32-3) bytes to copy + the src is unaligned. and we may cross at least one 32-byte + boundary. Also we have the following register values: + r3 == adjusted dst, word aligned + r4 == unadjusted src + r5 == unadjusted len + r9 == adjusted Word length + r10 == src alignment (1-3) + r12 == adjusted src, not aligned + r31 == adjusted len + + First we need to copy word up to but not crossing the next 32-byte + boundary. Then perform aligned loads just before and just after + the boundary and use shifts and or to generate the next aligned + word for dst. If more than 32 bytes remain we copy (unaligned src) + the next 7 words and repeat the loop until less than 32-bytes + remain. + + Then if more than 4 bytes remain we again use aligned loads, + shifts and or to generate the next dst word. We then process the + remaining words using unaligned loads as needed. Finally we check + if there are more than 0 bytes (1-3) bytes remaining and use + halfword and or byte load/stores to complete the copy. +*/ + mr 4,12 /* restore unaligned adjusted src ptr */ + clrlwi 0,12,27 /* Find dist from previous 32-byte boundary. */ + slwi 10,10,3 /* calculate number of bits to shift 1st word left */ + cmplwi cr5,0,16 + subfic 8,0,32 /* Number of bytes to next 32-byte boundary. */ + + mtcrf 0x01,8 + cmplwi cr1,10,16 + subfic 9,10,32 /* number of bits to shift 2nd word right */ +/* This test is reversed because the timing to compare the bytes to + 32-byte boundary could not be meet. So we compare the bytes from + previous 32-byte boundary and invert the test. */ + bge cr5,L(wdu_h32_8) + .align 4 + lwz 6,0(4) + lwz 7,4(4) + addi 12,4,16 /* generate alternate pointers to avoid agen */ + addi 11,3,16 /* timing issues downstream. */ + stw 6,0(3) + stw 7,4(3) + subi 31,31,16 + lwz 6,8(4) + lwz 7,12(4) + addi 4,4,16 + stw 6,8(3) + stw 7,12(3) + addi 3,3,16 + bf 28,L(wdu_h32_4) + lwz 6,0(12) + lwz 7,4(12) + subi 31,31,8 + addi 4,4,8 + stw 6,0(11) + stw 7,4(11) + addi 3,3,8 + bf 29,L(wdu_h32_0) + lwz 6,8(12) + addi 4,4,4 + subi 31,31,4 + stw 6,8(11) + addi 3,3,4 + b L(wdu_h32_0) + .align 4 +L(wdu_h32_8): + bf 28,L(wdu_h32_4) + lwz 6,0(4) + lwz 7,4(4) + subi 31,31,8 + bf 29,L(wdu_h32_8x) + stw 6,0(3) + stw 7,4(3) + lwz 6,8(4) + addi 4,4,12 + subi 31,31,4 + stw 6,8(3) + addi 3,3,12 + b L(wdu_h32_0) + .align 4 +L(wdu_h32_8x): + addi 4,4,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 + b L(wdu_h32_0) + .align 4 +L(wdu_h32_4): + bf 29,L(wdu_h32_0) + lwz 6,0(4) + subi 31,31,4 + addi 4,4,4 + stw 6,0(3) + addi 3,3,4 + .align 4 +L(wdu_h32_0): +/* set up for 32-byte boundary crossing word move and possibly 32-byte + move loop. */ + clrrwi 12,4,2 + cmplwi cr5,31,32 + bge cr1,L(wdu2_32) +#if 0 + b L(wdu1_32) +/* + cmplwi cr1,10,8 + beq cr1,L(wdu1_32) + cmplwi cr1,10,16 + beq cr1,L(wdu2_32) + cmplwi cr1,10,24 + beq cr1,L(wdu3_32) +*/ +L(wdu_32): + lwz 6,0(12) + cmplwi cr6,31,4 + srwi 8,31,5 /* calculate the 32 byte loop count */ + slw 0,6,10 + clrlwi 31,31,27 /* The remaining bytes, < 32. */ + blt cr5,L(wdu_32tail) + mtctr 8 + cmplwi cr6,31,4 + .align 4 +L(wdu_loop32): + /* copy 32 bytes at a time */ + lwz 8,4(12) + addi 12,12,32 + lwz 7,4(4) + srw 8,8,9 + or 0,0,8 + stw 0,0(3) + stw 7,4(3) + lwz 6,8(4) + lwz 7,12(4) + stw 6,8(3) + stw 7,12(3) + lwz 6,16(4) + lwz 7,20(4) + stw 6,16(3) + stw 7,20(3) + lwz 6,24(4) + lwz 7,28(4) + lwz 8,0(12) + addi 4,4,32 + stw 6,24(3) + stw 7,28(3) + addi 3,3,32 + slw 0,8,10 + bdnz+ L(wdu_loop32) + +L(wdu_32tail): + mtcrf 0x01,31 + cmplwi cr5,31,16 + blt cr6,L(wdu_4tail) + /* calculate and store the final word */ + lwz 8,4(12) + srw 8,8,9 + or 6,0,8 + b L(wdu_32tailx) +#endif + .align 4 +L(wdu1_32): + lwz 6,-1(4) + cmplwi cr6,31,4 + srwi 8,31,5 /* calculate the 32 byte loop count */ +#ifdef __LITTLE_ENDIAN__ + srwi 6,6,8 +#else + slwi 6,6,8 +#endif + clrlwi 31,31,27 /* The remaining bytes, < 32. */ + blt cr5,L(wdu1_32tail) + mtctr 8 + cmplwi cr6,31,4 + + lwz 8,3(4) + lwz 7,4(4) +#ifdef __LITTLE_ENDIAN__ + rldimi 6,8,24,32 +#else +/* Equivalent to: srwi 8,8,32-8; or 6,6,8 */ + rlwimi 6,8,8,(32-8),31 +#endif + b L(wdu1_loop32x) + .align 4 +L(wdu1_loop32): + /* copy 32 bytes at a time */ + lwz 8,3(4) + lwz 7,4(4) + stw 10,-8(3) + stw 11,-4(3) +#ifdef __LITTLE_ENDIAN__ + rldimi 6,8,24,32 +#else +/* Equivalent to srwi 8,8,32-8; or 6,6,8 */ + rlwimi 6,8,8,(32-8),31 +#endif +L(wdu1_loop32x): + lwz 10,8(4) + lwz 11,12(4) + stw 6,0(3) + stw 7,4(3) + lwz 6,16(4) + lwz 7,20(4) + stw 10,8(3) + stw 11,12(3) + lwz 10,24(4) + lwz 11,28(4) + lwz 8,32-1(4) + addi 4,4,32 + stw 6,16(3) + stw 7,20(3) + addi 3,3,32 +#ifdef __LITTLE_ENDIAN__ + srwi 6,8,8 +#else + slwi 6,8,8 +#endif + bdnz+ L(wdu1_loop32) + stw 10,-8(3) + stw 11,-4(3) + +L(wdu1_32tail): + mtcrf 0x01,31 + cmplwi cr5,31,16 + blt cr6,L(wdu_4tail) + /* calculate and store the final word */ + lwz 8,3(4) +#ifdef __LITTLE_ENDIAN__ + rldimi 6,8,24,32 +#else +/* Equivalent to: srwi 8,8,32-8; or 6,6,8 */ + rlwimi 6,8,8,(32-8),31 +#endif + b L(wdu_32tailx) + +L(wdu2_32): + bgt cr1,L(wdu3_32) + lwz 6,-2(4) + cmplwi cr6,31,4 + srwi 8,31,5 /* calculate the 32 byte loop count */ +#ifdef __LITTLE_ENDIAN__ + srwi 6,6,16 +#else + slwi 6,6,16 +#endif + clrlwi 31,31,27 /* The remaining bytes, < 32. */ + blt cr5,L(wdu2_32tail) + mtctr 8 + cmplwi cr6,31,4 + + lwz 8,2(4) + lwz 7,4(4) +#ifdef __LITTLE_ENDIAN__ + rldimi 6,8,16,32 +#else + rlwimi 6,8,16,(32-16),31 +#endif + b L(wdu2_loop32x) + .align 4 +L(wdu2_loop32): + /* copy 32 bytes at a time */ + lwz 8,2(4) + lwz 7,4(4) + stw 10,-8(3) + stw 11,-4(3) +#ifdef __LITTLE_ENDIAN__ + rldimi 6,8,16,32 +#else + rlwimi 6,8,16,(32-16),31 +#endif +L(wdu2_loop32x): + lwz 10,8(4) + lwz 11,12(4) + stw 6,0(3) + stw 7,4(3) + lwz 6,16(4) + lwz 7,20(4) + stw 10,8(3) + stw 11,12(3) + lwz 10,24(4) + lwz 11,28(4) +/* lwz 8,0(12) */ + lwz 8,32-2(4) + addi 4,4,32 + stw 6,16(3) + stw 7,20(3) + addi 3,3,32 +#ifdef __LITTLE_ENDIAN__ + srwi 6,8,16 +#else + slwi 6,8,16 +#endif + bdnz+ L(wdu2_loop32) + stw 10,-8(3) + stw 11,-4(3) + +L(wdu2_32tail): + mtcrf 0x01,31 + cmplwi cr5,31,16 + blt cr6,L(wdu_4tail) + /* calculate and store the final word */ + lwz 8,2(4) +#ifdef __LITTLE_ENDIAN__ + rldimi 6,8,16,32 +#else + rlwimi 6,8,16,(32-16),31 +#endif + b L(wdu_32tailx) + +L(wdu3_32): +/* lwz 6,0(12) */ + lwz 6,-3(4) + cmplwi cr6,31,4 + srwi 8,31,5 /* calculate the 32 byte loop count */ +#ifdef __LITTLE_ENDIAN__ + srwi 6,6,24 +#else + slwi 6,6,24 +#endif + clrlwi 31,31,27 /* The remaining bytes, < 32. */ + blt cr5,L(wdu3_32tail) + mtctr 8 + cmplwi cr6,31,4 + + lwz 8,1(4) + lwz 7,4(4) +#ifdef __LITTLE_ENDIAN__ + rldimi 6,8,8,32 +#else + rlwimi 6,8,24,(32-24),31 +#endif + b L(wdu3_loop32x) + .align 4 +L(wdu3_loop32): + /* copy 32 bytes at a time */ + lwz 8,1(4) + lwz 7,4(4) + stw 10,-8(3) + stw 11,-4(3) +#ifdef __LITTLE_ENDIAN__ + rldimi 6,8,8,32 +#else + rlwimi 6,8,24,(32-24),31 +#endif +L(wdu3_loop32x): + lwz 10,8(4) + lwz 11,12(4) + stw 6,0(3) + stw 7,4(3) + lwz 6,16(4) + lwz 7,20(4) + stw 10,8(3) + stw 11,12(3) + lwz 10,24(4) + lwz 11,28(4) + lwz 8,32-3(4) + addi 4,4,32 + stw 6,16(3) + stw 7,20(3) + addi 3,3,32 +#ifdef __LITTLE_ENDIAN__ + srwi 6,8,24 +#else + slwi 6,8,24 +#endif + bdnz+ L(wdu3_loop32) + stw 10,-8(3) + stw 11,-4(3) + +L(wdu3_32tail): + mtcrf 0x01,31 + cmplwi cr5,31,16 + blt cr6,L(wdu_4tail) + /* calculate and store the final word */ + lwz 8,1(4) +#ifdef __LITTLE_ENDIAN__ + rldimi 6,8,8,32 +#else + rlwimi 6,8,24,(32-24),31 +#endif + b L(wdu_32tailx) + .align 4 +L(wdu_32tailx): + blt cr5,L(wdu_t32_8) + lwz 7,4(4) + addi 12,4,16 /* generate alternate pointers to avoid agen */ + addi 11,3,16 /* timing issues downstream. */ + stw 6,0(3) + stw 7,4(3) + subi 31,31,16 + lwz 6,8(4) + lwz 7,12(4) + addi 4,4,16 + stw 6,8(3) + stw 7,12(3) + addi 3,3,16 + bf 28,L(wdu_t32_4x) + lwz 6,0(12) + lwz 7,4(12) + addi 4,4,8 + subi 31,31,8 + stw 6,0(11) + stw 7,4(11) + addi 3,3,8 + bf 29,L(wdu_t32_0) + lwz 6,8(12) + addi 4,4,4 + subi 31,31,4 + stw 6,8(11) + addi 3,3,4 + b L(wdu_t32_0) + .align 4 +L(wdu_t32_4x): + bf 29,L(wdu_t32_0) + lwz 6,0(4) + addi 4,4,4 + subi 31,31,4 + stw 6,0(3) + addi 3,3,4 + b L(wdu_t32_0) + .align 4 +L(wdu_t32_8): + bf 28,L(wdu_t32_4) + lwz 7,4(4) + subi 31,31,8 + bf 29,L(wdu_t32_8x) + stw 6,0(3) + stw 7,4(3) + lwz 6,8(4) + subi 31,31,4 + addi 4,4,12 + stw 6,8(3) + addi 3,3,12 + b L(wdu_t32_0) + .align 4 +L(wdu_t32_8x): + addi 4,4,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 + b L(wdu_t32_0) + .align 4 +L(wdu_t32_4): + subi 31,31,4 + stw 6,0(3) + addi 4,4,4 + addi 3,3,4 + .align 4 +L(wdu_t32_0): +L(wdu_4tail): + cmplwi cr6,31,0 + beq cr6,L(wdus_0) /* If the tail is 0 bytes we are done! */ + bf 30,L(wdus_3) + lhz 7,0(4) + sth 7,0(3) + bf 31,L(wdus_0) + lbz 8,2(4) + stb 8,2(3) + mr 3,30 + lwz 30,20(1) + lwz 31,24(1) + addi 1,1,32 + blr + .align 4 +L(wdus_3): + bf 31,L(wus_0) + lbz 6,0(4) + stb 6,0(3) + .align 4 +L(wdus_0): + /* Return original dst pointer. */ + mr 3,30 + lwz 30,20(1) + lwz 31,24(1) + addi 1,1,32 + blr +END (memcpy) + +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/memset.S new file mode 100644 index 0000000000..f221c32d2f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/memset.S @@ -0,0 +1,539 @@ +/* Optimized 32-bit memset implementation for POWER6. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. + + The memset is done in three sizes: byte (8 bits), word (32 bits), + cache line (1024 bits). There is a special case for setting cache lines + to 0, to take advantage of the dcbz instruction. */ + + .machine power6 +EALIGN (memset, 7, 0) + CALL_MCOUNT + +#define rTMP r0 +#define rRTN r3 /* Initial value of 1st argument. */ +#define rMEMP0 r3 /* Original value of 1st arg. */ +#define rCHR r4 /* Char to set in each byte. */ +#define rLEN r5 /* Length of region to set. */ +#define rMEMP r6 /* Address at which we are storing. */ +#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */ +#define rMEMP2 r8 + +#define rNEG64 r8 /* Constant -64 for clearing with dcbz. */ +#define rMEMP3 r9 /* Alt mem pointer. */ +L(_memset): +/* Take care of case for size <= 4. */ + cmplwi cr1, rLEN, 4 + andi. rALIGN, rMEMP0, 3 + mr rMEMP, rMEMP0 + ble- cr1, L(small) +/* Align to word boundary. */ + cmplwi cr5, rLEN, 31 + insrwi rCHR, rCHR, 8, 16 /* Replicate byte to halfword. */ + beq+ L(aligned) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 4 + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + bf+ 31, L(g0) + stb rCHR, 0(rMEMP0) + bt 30, L(aligned) +L(g0): + sth rCHR, -2(rMEMP) + + .align 4 +/* Handle the case of size < 31. */ +L(aligned): + mtcrf 0x01, rLEN + insrwi rCHR, rCHR, 16, 0 /* Replicate halfword to word. */ + ble cr5, L(medium) +/* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x1C + subfic rALIGN, rALIGN, 0x20 + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + cmplwi cr1, rALIGN, 0x10 + mr rMEMP2, rMEMP + bf 28, L(a1) + stw rCHR, -4(rMEMP2) + stwu rCHR, -8(rMEMP2) + nop +L(a1): blt cr1, L(a2) + stw rCHR, -4(rMEMP2) + stw rCHR, -8(rMEMP2) + stw rCHR, -12(rMEMP2) + stwu rCHR, -16(rMEMP2) +L(a2): bf 29, L(caligned) + stw rCHR, -4(rMEMP2) + + .align 3 +/* Now aligned to a 32 byte boundary. */ +L(caligned): + cmplwi cr1, rCHR, 0 + clrrwi. rALIGN, rLEN, 5 + mtcrf 0x01, rLEN + beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */ +L(nondcbz): + beq L(medium) /* We may not actually get to do a full line. */ + nop +/* Storing a non-zero "c" value. We are aligned at a sector (32-byte) + boundary may not be at cache line (128-byte) boundary. */ +L(nzloopstart): +/* memset in 32-byte chunks until we get to a cache line boundary. + If rLEN is less than the distance to the next cache-line boundary use + cacheAligned1 code to finish the tail. */ + cmplwi cr1,rLEN,128 + + andi. rTMP,rMEMP,127 + blt cr1,L(cacheAligned1) + addi rMEMP3,rMEMP,32 + beq L(nzCacheAligned) + addi rLEN,rLEN,-32 + stw rCHR,0(rMEMP) + stw rCHR,4(rMEMP) + stw rCHR,8(rMEMP) + stw rCHR,12(rMEMP) + stw rCHR,16(rMEMP) + stw rCHR,20(rMEMP) + addi rMEMP,rMEMP,32 + andi. rTMP,rMEMP3,127 + stw rCHR,-8(rMEMP3) + stw rCHR,-4(rMEMP3) + + beq L(nzCacheAligned) + addi rLEN,rLEN,-32 + stw rCHR,0(rMEMP3) + stw rCHR,4(rMEMP3) + addi rMEMP,rMEMP,32 + stw rCHR,8(rMEMP3) + stw rCHR,12(rMEMP3) + andi. rTMP,rMEMP,127 + stw rCHR,16(rMEMP3) + stw rCHR,20(rMEMP3) + stw rCHR,24(rMEMP3) + stw rCHR,28(rMEMP3) + + beq L(nzCacheAligned) + addi rLEN,rLEN,-32 +/* At this point we can overrun the store queue (pipe reject) so it is + time to slow things down. The store queue can merge two adjacent + stores into a single L1/L2 op, but the L2 is clocked at 1/2 the CPU. + So we add "group ending nops" to guarantee that we dispatch only two + stores every other cycle. */ + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,32(rMEMP3) + stw rCHR,36(rMEMP3) + addi rMEMP,rMEMP,32 + cmplwi cr1,rLEN,128 + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,40(rMEMP3) + stw rCHR,44(rMEMP3) + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,48(rMEMP3) + stw rCHR,52(rMEMP3) + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,56(rMEMP3) + stw rCHR,60(rMEMP3) + blt cr1,L(cacheAligned1) + b L(nzCacheAligned) + +/* Now we are aligned to the cache line and can use dcbtst. */ + .align 5 +L(nzCacheAligned): + cmplwi cr1,rLEN,128 + cmplwi cr6,rLEN,256 + blt cr1,L(cacheAligned1) + blt cr6,L(nzCacheAligned128) + .align 4 +L(nzCacheAligned128): + nop + addi rMEMP3,rMEMP,64 + stw rCHR,0(rMEMP) + stw rCHR,4(rMEMP) + stw rCHR,8(rMEMP) + stw rCHR,12(rMEMP) + stw rCHR,16(rMEMP) + stw rCHR,20(rMEMP) + stw rCHR,24(rMEMP) + stw rCHR,28(rMEMP) + stw rCHR,32(rMEMP) + stw rCHR,36(rMEMP) + stw rCHR,40(rMEMP) + stw rCHR,44(rMEMP) + stw rCHR,48(rMEMP) + stw rCHR,52(rMEMP) + stw rCHR,56(rMEMP) + stw rCHR,60(rMEMP) + addi rMEMP,rMEMP3,64 + addi rLEN,rLEN,-128 +/* At this point we can overrun the store queue (pipe reject) so it is + time to slow things down. The store queue can merge two adjacent + stores into a single L1/L2 op, but the L2 is clocked at 1/2 the CPU. + So we add "group ending nops" to guarantee that we dispatch only one + store per cycle. */ + stw rCHR,0(rMEMP3) + ori r1,r1,0 + stw rCHR,4(rMEMP3) + ori r1,r1,0 + stw rCHR,8(rMEMP3) + ori r1,r1,0 + stw rCHR,12(rMEMP3) + ori r1,r1,0 + stw rCHR,16(rMEMP3) + ori r1,r1,0 + stw rCHR,20(rMEMP3) + ori r1,r1,0 + stw rCHR,24(rMEMP3) + ori r1,r1,0 + stw rCHR,28(rMEMP3) + ori r1,r1,0 + stw rCHR,32(rMEMP3) + ori r1,r1,0 + stw rCHR,36(rMEMP3) + ori r1,r1,0 + stw rCHR,40(rMEMP3) + ori r1,r1,0 + stw rCHR,44(rMEMP3) + ori r1,r1,0 + stw rCHR,48(rMEMP3) + ori r1,r1,0 + stw rCHR,52(rMEMP3) + ori r1,r1,0 + stw rCHR,56(rMEMP3) + ori r1,r1,0 + stw rCHR,60(rMEMP3) + blt cr6,L(cacheAligned1) +#if IS_IN (libc) + lfd 0,-128(rMEMP) +#endif + b L(nzCacheAligned256) + .align 5 +L(nzCacheAligned256): + cmplwi cr1,rLEN,256 + addi rMEMP3,rMEMP,64 +#if !IS_IN (libc) +/* When we are not in libc we should use only GPRs to avoid the FPU lock + interrupt. */ + stw rCHR,0(rMEMP) + stw rCHR,4(rMEMP) + stw rCHR,8(rMEMP) + stw rCHR,12(rMEMP) + stw rCHR,16(rMEMP) + stw rCHR,20(rMEMP) + stw rCHR,24(rMEMP) + stw rCHR,28(rMEMP) + stw rCHR,32(rMEMP) + stw rCHR,36(rMEMP) + stw rCHR,40(rMEMP) + stw rCHR,44(rMEMP) + stw rCHR,48(rMEMP) + stw rCHR,52(rMEMP) + stw rCHR,56(rMEMP) + stw rCHR,60(rMEMP) + addi rMEMP,rMEMP3,64 + addi rLEN,rLEN,-128 + stw rCHR,0(rMEMP3) + stw rCHR,4(rMEMP3) + stw rCHR,8(rMEMP3) + stw rCHR,12(rMEMP3) + stw rCHR,16(rMEMP3) + stw rCHR,20(rMEMP3) + stw rCHR,24(rMEMP3) + stw rCHR,28(rMEMP3) + stw rCHR,32(rMEMP3) + stw rCHR,36(rMEMP3) + stw rCHR,40(rMEMP3) + stw rCHR,44(rMEMP3) + stw rCHR,48(rMEMP3) + stw rCHR,52(rMEMP3) + stw rCHR,56(rMEMP3) + stw rCHR,60(rMEMP3) +#else +/* We are in libc and this is a long memset so we can use FPRs and can afford + occasional FPU locked interrupts. */ + stfd 0,0(rMEMP) + stfd 0,8(rMEMP) + stfd 0,16(rMEMP) + stfd 0,24(rMEMP) + stfd 0,32(rMEMP) + stfd 0,40(rMEMP) + stfd 0,48(rMEMP) + stfd 0,56(rMEMP) + addi rMEMP,rMEMP3,64 + addi rLEN,rLEN,-128 + stfd 0,0(rMEMP3) + stfd 0,8(rMEMP3) + stfd 0,16(rMEMP3) + stfd 0,24(rMEMP3) + stfd 0,32(rMEMP3) + stfd 0,40(rMEMP3) + stfd 0,48(rMEMP3) + stfd 0,56(rMEMP3) +#endif + bge cr1,L(nzCacheAligned256) + dcbtst 0,rMEMP + b L(cacheAligned1) + + .align 4 +/* Storing a zero "c" value. We are aligned at a sector (32-byte) + boundary but may not be at cache line (128-byte) boundary. If the + remaining length spans a full cache line we can use the Data cache + block zero instruction. */ +L(zloopstart): +/* memset in 32-byte chunks until we get to a cache line boundary. + If rLEN is less than the distance to the next cache-line boundary use + cacheAligned1 code to finish the tail. */ + cmplwi cr1,rLEN,128 + beq L(medium) +L(getCacheAligned): + andi. rTMP,rMEMP,127 + blt cr1,L(cacheAligned1) + addi rMEMP3,rMEMP,32 + beq L(cacheAligned) + addi rLEN,rLEN,-32 + stw rCHR,0(rMEMP) + stw rCHR,4(rMEMP) + stw rCHR,8(rMEMP) + stw rCHR,12(rMEMP) + stw rCHR,16(rMEMP) + stw rCHR,20(rMEMP) + addi rMEMP,rMEMP,32 + andi. rTMP,rMEMP3,127 + stw rCHR,-8(rMEMP3) + stw rCHR,-4(rMEMP3) +L(getCacheAligned2): + beq L(cacheAligned) + addi rLEN,rLEN,-32 + addi rMEMP,rMEMP,32 + stw rCHR,0(rMEMP3) + stw rCHR,4(rMEMP3) + stw rCHR,8(rMEMP3) + stw rCHR,12(rMEMP3) + andi. rTMP,rMEMP,127 + nop + stw rCHR,16(rMEMP3) + stw rCHR,20(rMEMP3) + stw rCHR,24(rMEMP3) + stw rCHR,28(rMEMP3) +L(getCacheAligned3): + beq L(cacheAligned) +/* At this point we can overrun the store queue (pipe reject) so it is + time to slow things down. The store queue can merge two adjacent + stores into a single L1/L2 op, but the L2 is clocked at 1/2 the CPU. + So we add "group ending nops" to guarantee that we dispatch only two + stores every other cycle. */ + addi rLEN,rLEN,-32 + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,32(rMEMP3) + stw rCHR,36(rMEMP3) + addi rMEMP,rMEMP,32 + cmplwi cr1,rLEN,128 + ori r1,r1,0 + stw rCHR,40(rMEMP3) + stw rCHR,44(rMEMP3) + cmplwi cr6,rLEN,256 + li rMEMP2,128 + ori r1,r1,0 + stw rCHR,48(rMEMP3) + stw rCHR,52(rMEMP3) + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,56(rMEMP3) + stw rCHR,60(rMEMP3) + blt cr1,L(cacheAligned1) + blt cr6,L(cacheAligned128) + b L(cacheAlignedx) + +/* Now we are aligned to the cache line and can use dcbz. */ + .align 4 +L(cacheAligned): + cmplwi cr1,rLEN,128 + cmplwi cr6,rLEN,256 + blt cr1,L(cacheAligned1) + li rMEMP2,128 +L(cacheAlignedx): + cmplwi cr5,rLEN,640 + blt cr6,L(cacheAligned128) + bgt cr5,L(cacheAligned512) + cmplwi cr6,rLEN,512 + dcbz 0,rMEMP + cmplwi cr1,rLEN,384 + dcbz rMEMP2,rMEMP + addi rMEMP,rMEMP,256 + addi rLEN,rLEN,-256 + blt cr1,L(cacheAligned1) + blt cr6,L(cacheAligned128) + b L(cacheAligned256) + .align 5 +/* A simple loop for the longer (>640 bytes) lengths. This form limits + the branch miss-predicted to exactly 1 at loop exit.*/ +L(cacheAligned512): + cmplwi cr1,rLEN,128 + blt cr1,L(cacheAligned1) + dcbz 0,rMEMP + addi rLEN,rLEN,-128 + addi rMEMP,rMEMP,128 + b L(cacheAligned512) + .align 5 +L(cacheAligned256): + cmplwi cr6,rLEN,512 + dcbz 0,rMEMP + cmplwi cr1,rLEN,384 + dcbz rMEMP2,rMEMP + addi rMEMP,rMEMP,256 + addi rLEN,rLEN,-256 + bge cr6,L(cacheAligned256) + blt cr1,L(cacheAligned1) + .align 4 +L(cacheAligned128): + dcbz 0,rMEMP + addi rMEMP,rMEMP,128 + addi rLEN,rLEN,-128 + .align 4 +L(cacheAligned1): + cmplwi cr1,rLEN,32 + blt cr1,L(handletail32) + addi rMEMP3,rMEMP,32 + addi rLEN,rLEN,-32 + stw rCHR,0(rMEMP) + stw rCHR,4(rMEMP) + stw rCHR,8(rMEMP) + stw rCHR,12(rMEMP) + stw rCHR,16(rMEMP) + stw rCHR,20(rMEMP) + addi rMEMP,rMEMP,32 + cmplwi cr1,rLEN,32 + stw rCHR,-8(rMEMP3) + stw rCHR,-4(rMEMP3) +L(cacheAligned2): + blt cr1,L(handletail32) + addi rLEN,rLEN,-32 + stw rCHR,0(rMEMP3) + stw rCHR,4(rMEMP3) + stw rCHR,8(rMEMP3) + stw rCHR,12(rMEMP3) + addi rMEMP,rMEMP,32 + cmplwi cr1,rLEN,32 + stw rCHR,16(rMEMP3) + stw rCHR,20(rMEMP3) + stw rCHR,24(rMEMP3) + stw rCHR,28(rMEMP3) + nop +L(cacheAligned3): + blt cr1,L(handletail32) +/* At this point we can overrun the store queue (pipe reject) so it is + time to slow things down. The store queue can merge two adjacent + stores into a single L1/L2 op, but the L2 is clocked at 1/2 the CPU. + So we add "group ending nops" to guarantee that we dispatch only two + stores every other cycle. */ + ori r1,r1,0 + ori r1,r1,0 + addi rMEMP,rMEMP,32 + addi rLEN,rLEN,-32 + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,32(rMEMP3) + stw rCHR,36(rMEMP3) + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,40(rMEMP3) + stw rCHR,44(rMEMP3) + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,48(rMEMP3) + stw rCHR,52(rMEMP3) + ori r1,r1,0 + ori r1,r1,0 + stw rCHR,56(rMEMP3) + stw rCHR,60(rMEMP3) + +/* We are here because the length or remainder (rLEN) is less than the + cache line/sector size and does not justify aggressive loop unrolling. + So set up the preconditions for L(medium) and go there. */ + .align 3 +L(handletail32): + cmplwi cr1,rLEN,0 + beqlr cr1 + b L(medium) + + .align 4 +L(small): +/* Memset of 4 bytes or less. */ + cmplwi cr5, rLEN, 1 + cmplwi cr1, rLEN, 3 + bltlr cr5 + stb rCHR, 0(rMEMP) + beqlr cr5 + stb rCHR, 1(rMEMP) + bltlr cr1 + stb rCHR, 2(rMEMP) + beqlr cr1 + stb rCHR, 3(rMEMP) + blr + +/* Memset of 0-31 bytes. */ + .align 5 +L(medium): + cmplwi cr1, rLEN, 16 +L(medium_tail2): + add rMEMP, rMEMP, rLEN +L(medium_tail): + bt- 31, L(medium_31t) + bt- 30, L(medium_30t) +L(medium_30f): + bt 29, L(medium_29t) +L(medium_29f): + bge cr1, L(medium_27t) + bflr 28 + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + blr + +L(medium_31t): + stbu rCHR, -1(rMEMP) + bf- 30, L(medium_30f) +L(medium_30t): + sthu rCHR, -2(rMEMP) + bf- 29, L(medium_29f) +L(medium_29t): + stwu rCHR, -4(rMEMP) + blt cr1, L(medium_27f) +L(medium_27t): + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + stw rCHR, -12(rMEMP) + stwu rCHR, -16(rMEMP) +L(medium_27f): + bflr 28 +L(medium_28t): + stw rCHR, -4(rMEMP) + stw rCHR, -8(rMEMP) + blr +END (memset) +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/multiarch/Implies new file mode 100644 index 0000000000..ff9f999749 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power5+/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/Implies new file mode 100644 index 0000000000..c0e1bea435 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/power6/fpu +powerpc/powerpc32/power6 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/Implies new file mode 100644 index 0000000000..d53ce2573c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power6/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/multiarch/Implies new file mode 100644 index 0000000000..c66805ee63 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power5+/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/s_lrint.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/s_lrint.S new file mode 100644 index 0000000000..cb780516b5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/s_lrint.S @@ -0,0 +1,41 @@ +/* Round double to long int. POWER6x PowerPC32 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power6" +/* long int[r3] __lrint (double x[fp1]) */ +ENTRY (__lrint) + fctiw fp13,fp1 + mftgpr r3,fp13 + blr + END (__lrint) + +weak_alias (__lrint, lrint) + +strong_alias (__lrint, __lrintf) +weak_alias (__lrint, lrintf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__lrint, __lrintl) +weak_alias (__lrint, lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/s_lround.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/s_lround.S new file mode 100644 index 0000000000..05b13cd34c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/fpu/s_lround.S @@ -0,0 +1,51 @@ +/* lround function. POWER6x, PowerPC32 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long [r3] lround (float x [fp1]) + IEEE 1003.1 lround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we pre-round using the V2.02 Floating Round to Integer Nearest + instruction before we use the Floating Convert to Integer Word with + round to zero instruction. */ + + .machine "power6" +ENTRY (__lround) + frin fp2,fp1 /* Pre-round +-0.5. */ + fctiwz fp3,fp2 /* Convert To Integer Word lround toward 0. */ + mftgpr r3,fp3 /* Transfer fpr3 to r3. */ + blr + END (__lround) + +weak_alias (__lround, lround) + +strong_alias (__lround, __lroundf) +weak_alias (__lround, lroundf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/multiarch/Implies new file mode 100644 index 0000000000..ff9f999749 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power6x/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power5+/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/Implies new file mode 100644 index 0000000000..c0e1bea435 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/power6/fpu +powerpc/powerpc32/power6 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/Makefile new file mode 100644 index 0000000000..5e8f4a28ba --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/Makefile @@ -0,0 +1,4 @@ +ifeq ($(subdir),string) +CFLAGS-strncase.c += -funroll-loops +CFLAGS-strncase_l.c += -funroll-loops +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/multiarch/Implies new file mode 100644 index 0000000000..45cbaede9f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power6/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S new file mode 100644 index 0000000000..da4efa0fb9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S @@ -0,0 +1,93 @@ +/* finite(). PowerPC32/POWER7 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __finite(x) */ + .section .rodata.cst8,"aM",@progbits,8 + .align 3 +.LC0: /* 1.0 */ + .quad 0x3ff0000000000000 + + .section ".text" + .type __finite, @function + .machine power7 +ENTRY (__finite) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfd fp0,.LC0-got_label@l(r9) + + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfd fp0,.LC0@l(r9) +#endif + ftdiv cr7,fp1,fp0 + li r3,1 + bflr 30 + + /* We have -INF/+INF/NaN or a denormal. */ + + stwu r1,-16(r1) /* Allocate stack space. */ + stfd fp1,8(r1) /* Transfer FP to GPR's. */ + + ori 2,2,0 /* Force a new dispatch group. */ + lhz r0,8+HISHORT(r1) /* Fetch the upper 16 bits of the FP value + (biased exponent and sign bit). */ + clrlwi r0,r0,17 /* r0 = abs(r0). */ + addi r1,r1,16 /* Reset the stack pointer. */ + cmpwi cr7,r0,0x7ff0 /* r4 == 0x7ff0?. */ + bltlr cr7 /* LT means we have a denormal. */ + li r3,0 + blr + END (__finite) + +hidden_def (__finite) +weak_alias (__finite, finite) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__finite, __finitef) +hidden_def (__finitef) +weak_alias (__finitef, finitef) + +#ifdef NO_LONG_DOUBLE +strong_alias (__finite, __finitel) +weak_alias (__finite, finitel) +#endif + +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, finite, finitel, GLIBC_2_0) +# endif +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_1) +compat_symbol (libm, __finite, __finitel, GLIBC_2_1) +# endif +#else +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __finite, __finitel, GLIBC_2_0); +compat_symbol (libc, finite, finitel, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_finitef.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_finitef.S new file mode 100644 index 0000000000..54bd94176d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_finitef.S @@ -0,0 +1 @@ +/* This function uses the same code as s_finite.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S new file mode 100644 index 0000000000..668815761a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S @@ -0,0 +1,85 @@ +/* isinf(). PowerPC32/POWER7 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isinf(x) */ + .section .rodata.cst8,"aM",@progbits,8 + .align 3 +.LC0: /* 1.0 */ + .quad 0x3ff0000000000000 + + .section ".text" + .type __isinf, @function + .machine power7 +ENTRY (__isinf) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfd fp0,.LC0-got_label@l(r9) + + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfd fp0,.LC0@l(r9) +#endif + ftdiv cr7,fp1,fp0 + li r3,0 + bflr 29 /* If not INF, return. */ + + /* Either we have +INF or -INF. */ + + stwu r1,-16(r1) /* Allocate stack space. */ + stfd fp1,8(r1) /* Transfer FP to GPR's. */ + ori 2,2,0 /* Force a new dispatch group. */ + lhz r4,8+HISHORT(r1) /* Fetch the upper 16 bits of the FP value + (biased exponent and sign bit). */ + addi r1,r1,16 /* Reset the stack pointer. */ + cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */ + li r3,1 + beqlr cr7 /* EQ means INF, otherwise -INF. */ + li r3,-1 + blr + END (__isinf) + +hidden_def (__isinf) +weak_alias (__isinf, isinf) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isinf, __isinff) +hidden_def (__isinff) +weak_alias (__isinff, isinff) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isinf, __isinfl) +weak_alias (__isinf, isinfl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0); +compat_symbol (libc, isinf, isinfl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isinff.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isinff.S new file mode 100644 index 0000000000..be759e091e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isinff.S @@ -0,0 +1 @@ +/* This function uses the same code as s_isinf.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S new file mode 100644 index 0000000000..433137f1c4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S @@ -0,0 +1,90 @@ +/* isnan(). PowerPC32/POWER7 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .section .rodata.cst8,"aM",@progbits,8 + .align 3 +.LC0: /* 1.0 */ + .quad 0x3ff0000000000000 + + .section ".text" + .type __isnan, @function + .machine power7 +ENTRY (__isnan) +#ifdef SHARED + mflr r11 + cfi_register(lr,r11) + + SETUP_GOT_ACCESS(r9,got_label) + addis r9,r9,.LC0-got_label@ha + lfd fp0,.LC0-got_label@l(r9) + + mtlr r11 + cfi_same_value (lr) +#else + lis r9,.LC0@ha + lfd fp0,.LC0@l(r9) +#endif + ftdiv cr7,fp1,fp0 + li r3,0 + bflr 30 /* If not NaN or Inf, finish. */ + + /* We have -INF/+INF/NaN or a denormal. */ + + stwu r1,-16(r1) /* Allocate stack space. */ + stfd fp1,8(r1) /* Transfer FP to GPR's. */ + ori 2,2,0 /* Force a new dispatch group. */ + lwz r4,8+HIWORD(r1) /* Load the upper half of the FP value. */ + lwz r5,8+LOWORD(r1) /* Load the lower half of the FP value. */ + addi r1,r1,16 /* Reset the stack pointer. */ + lis r0,0x7ff0 /* Load the upper portion for an INF/NaN. */ + clrlwi r4,r4,1 /* r4 = abs(r4). */ + cmpw cr7,r4,r0 /* if (abs(r4) <= inf). */ + cmpwi cr6,r5,0 /* r5 == 0x00000000? */ + bltlr cr7 /* LT means we have a denormal. */ + bgt cr7,L(NaN) /* GT means we have a NaN. */ + beqlr cr6 /* EQ means we have +/-INF. */ +L(NaN): + li r3,1 /* x == NaN? */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S new file mode 100644 index 0000000000..b48c85e0d3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S @@ -0,0 +1 @@ +/* This function uses the same code as s_isnan.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memchr.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memchr.S new file mode 100644 index 0000000000..9ce8507a82 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memchr.S @@ -0,0 +1,193 @@ +/* Optimized memchr implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memchr (char *s [r3], int byte [r4], int size [r5]) */ + .machine power7 +ENTRY (__memchr) + CALL_MCOUNT + dcbt 0,r3 + clrrwi r8,r3,2 + insrwi r4,r4,8,16 /* Replicate byte to word. */ + + /* Calculate the last acceptable address and check for possible + addition overflow by using satured math: + r7 = r3 + r5 + r7 |= -(r7 < x) */ + add r7,r3,r5 + subfc r6,r3,r7 + subfe r9,r9,r9 + or r7,r7,r9 + + insrwi r4,r4,16,0 + cmplwi r5,16 + li r9, -1 + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + addi r7,r7,-1 +#ifdef __LITTLE_ENDIAN__ + slw r9,r9,r6 +#else + srw r9,r9,r6 +#endif + ble L(small_range) + + lwz r12,0(r8) /* Load word from memory. */ + cmpb r3,r12,r4 /* Check for BYTEs in WORD1. */ + and r3,r3,r9 + clrlwi r5,r7,30 /* Byte count - 1 in last word. */ + clrrwi r7,r7,2 /* Address of last word. */ + cmplwi cr7,r3,0 /* If r3 == 0, no BYTEs have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + bt 29,L(loop_setup) + + /* Handle WORD2 of pair. */ + lwzu r12,4(r8) + cmpb r3,r12,r4 + cmplwi cr7,r3,0 + bne cr7,L(done) + +L(loop_setup): + /* The last word we want to read in the loop below is the one + containing the last byte of the string, ie. the word at + (s + size - 1) & ~3, or r7. The first word read is at + r8 + 4, we read 2 * cnt words, so the last word read will + be at r8 + 4 + 8 * cnt - 4. Solving for cnt gives + cnt = (r7 - r8) / 8 */ + sub r6,r7,r8 + srwi r6,r6,3 /* Number of loop iterations. */ + mtctr r6 /* Setup the counter. */ + + /* Main loop to look for BYTE in the string. Since + it's a small loop (8 instructions), align it to 32-bytes. */ + .align 5 +L(loop): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + lwz r12,4(r8) + lwzu r11,8(r8) + cmpb r3,r12,r4 + cmpb r9,r11,r4 + or r6,r9,r3 /* Merge everything in one word. */ + cmplwi cr7,r6,0 + bne cr7,L(found) + bdnz L(loop) + + /* We may have one more dword to read. */ + cmplw r8,r7 + beqlr + + lwzu r12,4(r8) + cmpb r3,r12,r4 + cmplwi cr6,r3,0 + bne cr6,L(done) + blr + + .align 4 +L(found): + /* OK, one (or both) of the words contains BYTE. Check + the first word and decrement the address in case the first + word really contains BYTE. */ + cmplwi cr6,r3,0 + addi r8,r8,-4 + bne cr6,L(done) + + /* BYTE must be in the second word. Adjust the address + again and move the result of cmpb to r3 so we can calculate the + pointer. */ + + mr r3,r9 + addi r8,r8,4 + + /* r3 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as BYTE in the original + word from the string. Use that to calculate the pointer. + We need to make sure BYTE is *before* the end of the range. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r3,-1 + andc r0,r0,r3 + popcntw r0,r0 /* Count trailing zeros. */ +#else + cntlzw r0,r3 /* Count leading zeros before the match. */ +#endif + cmplw r8,r7 /* Are we on the last word? */ + srwi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r8,r0 + cmplw cr7,r0,r5 /* If on the last dword, check byte offset. */ + bnelr + blelr cr7 + li r3,0 + blr + + .align 4 +L(null): + li r3,0 + blr + +/* Deals with size <= 16. */ + .align 4 +L(small_range): + cmplwi r5,0 + beq L(null) + lwz r12,0(r8) /* Load word from memory. */ + cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */ + and r3,r3,r9 + cmplwi cr7,r3,0 + clrlwi r5,r7,30 /* Byte count - 1 in last word. */ + clrrwi r7,r7,2 /* Address of last word. */ + cmplw r8,r7 /* Are we done already? */ + bne cr7,L(done) + beqlr + + lwzu r12,4(r8) + cmpb r3,r12,r4 + cmplwi cr6,r3,0 + cmplw r8,r7 + bne cr6,L(done) + beqlr + + lwzu r12,4(r8) + cmpb r3,r12,r4 + cmplwi cr6,r3,0 + cmplw r8,r7 + bne cr6,L(done) + beqlr + + lwzu r12,4(r8) + cmpb r3,r12,r4 + cmplwi cr6,r3,0 + cmplw r8,r7 + bne cr6,L(done) + beqlr + + lwzu r12,4(r8) + cmpb r3,r12,r4 + cmplwi cr6,r3,0 + bne cr6,L(done) + blr + +END (__memchr) +weak_alias (__memchr, memchr) +libc_hidden_builtin_def (memchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memcmp.S new file mode 100644 index 0000000000..09c9b9bf4d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memcmp.S @@ -0,0 +1,1375 @@ +/* Optimized memcmp implementation for POWER7/PowerPC32. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memcmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + + .machine power7 +EALIGN (memcmp, 4, 0) + CALL_MCOUNT + +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r8 /* next word in s1 */ +#define rWORD4 r9 /* next word in s2 */ +#define rWORD5 r10 /* next word in s1 */ +#define rWORD6 r11 /* next word in s2 */ +#define rWORD7 r30 /* next word in s1 */ +#define rWORD8 r31 /* next word in s2 */ + + xor r0, rSTR2, rSTR1 + cmplwi cr6, rN, 0 + cmplwi cr1, rN, 12 + clrlwi. r0, r0, 30 + clrlwi r12, rSTR1, 30 + cmplwi cr5, r12, 0 + beq- cr6, L(zeroLength) + dcbt 0, rSTR1 + dcbt 0, rSTR2 +/* If less than 8 bytes or not aligned, use the unaligned + byte loop. */ + blt cr1, L(bytealigned) + stwu 1, -64(r1) + cfi_adjust_cfa_offset(64) + stw rWORD8, 48(r1) + stw rWORD7, 44(r1) + cfi_offset(rWORD8, (48-64)) + cfi_offset(rWORD7, (44-64)) + bne L(unaligned) +/* At this point we know both strings have the same alignment and the + compare length is at least 8 bytes. r12 contains the low order + 2 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then we are already word + aligned and can perform the word aligned loop. + + Otherwise we know the two strings have the same alignment (but not + yet word aligned). So we force the string addresses to the next lower + word boundary and special case this first word using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (word aligned) compare loop, starting at the second word, + we need to adjust the length (rN) and special case the loop + versioning for the first word. This ensures that the loop count is + correct and the first word (shifted) is in the expected register pair. */ + .align 4 +L(samealignment): + clrrwi rSTR1, rSTR1, 2 + clrrwi rSTR2, rSTR2, 2 + beq cr5, L(Waligned) + add rN, rN, r12 + slwi rWORD6, r12, 3 + srwi r0, rN, 4 /* Divide by 16 */ + andi. r12, rN, 12 /* Get the word remainder */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) +#endif + cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + clrlwi rN, rN, 30 + beq L(dPs4) + mtctr r0 + bgt cr1, L(dPs3) + beq cr1, L(dPs2) + +/* Remainder is 4 */ + .align 3 +L(dsP1): + slw rWORD5, rWORD1, rWORD6 + slw rWORD6, rWORD2, rWORD6 + cmplw cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +/* Do something useful in this cycle since we have to branch anyway. */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + b L(dP1e) +/* Remainder is 8 */ + .align 4 +L(dPs2): + slw rWORD5, rWORD1, rWORD6 + slw rWORD6, rWORD2, rWORD6 + cmplw cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +/* Do something useful in this cycle since we have to branch anyway. */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 4(rSTR1) + lwz rWORD8, 4(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + b L(dP2e) +/* Remainder is 12 */ + .align 4 +L(dPs3): + slw rWORD3, rWORD1, rWORD6 + slw rWORD4, rWORD2, rWORD6 + cmplw cr1, rWORD3, rWORD4 + b L(dP3e) +/* Count is a multiple of 16, remainder is 0 */ + .align 4 +L(dPs4): + mtctr r0 + slw rWORD1, rWORD1, rWORD6 + slw rWORD2, rWORD2, rWORD6 + cmplw cr7, rWORD1, rWORD2 + b L(dP4e) + +/* At this point we know both strings are word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(Waligned): + andi. r12, rN, 12 /* Get the word remainder */ + srwi r0, rN, 4 /* Divide by 16 */ + cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + clrlwi rN, rN, 30 + beq L(dP4) + bgt cr1, L(dP3) + beq cr1, L(dP2) + +/* Remainder is 4 */ + .align 4 +L(dP1): + mtctr r0 +/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not + change any on the early exit path. The key here is the non-early + exit path only cares about the condition code (cr5), not about which + register pair was used. */ +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 0(rSTR1) + lwz rWORD6, 0(rSTR2) +#endif + cmplw cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 +L(dP1e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5x) + bne cr7, L(dLcr7x) + +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 16(rSTR1) + lwzu rWORD8, 16(rSTR2) +#endif + bne cr1, L(dLcr1) + cmplw cr5, rWORD7, rWORD8 + bdnz L(dLoop) + bne cr6, L(dLcr6) + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) + .align 3 +L(dP1x): + slwi. r12, rN, 3 + bne cr5, L(dLcr5x) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ + addi r1, r1, 64 + cfi_adjust_cfa_offset(-64) + bne L(d00) + li rRTN, 0 + blr + +/* Remainder is 8 */ + .align 4 + cfi_adjust_cfa_offset(64) +L(dP2): + mtctr r0 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 0(rSTR1) + lwz rWORD6, 0(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 4(rSTR1) + lwz rWORD8, 4(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 +L(dP2e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 8(rSTR1) + lwz rWORD2, 8(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 12(rSTR1) + lwz rWORD4, 12(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#endif + bne cr6, L(dLcr6) + bne cr5, L(dLcr5) + b L(dLoop2) +/* Again we are on a early exit path (16-23 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP2x): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 4(rSTR1) + lwz rWORD4, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + slwi. r12, rN, 3 + bne cr6, L(dLcr6x) +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#endif + bne cr1, L(dLcr1x) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ + addi r1, r1, 64 + cfi_adjust_cfa_offset(-64) + bne L(d00) + li rRTN, 0 + blr + +/* Remainder is 12 */ + .align 4 + cfi_adjust_cfa_offset(64) +L(dP3): + mtctr r0 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 0(rSTR1) + lwz rWORD4, 0(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 +L(dP3e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 4(rSTR1) + lwz rWORD6, 4(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + blt cr7, L(dP3x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 8(rSTR1) + lwz rWORD8, 8(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 12(rSTR1) + lwz rWORD2, 12(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + bne cr1, L(dLcr1) + bne cr6, L(dLcr6) + b L(dLoop1) +/* Again we are on a early exit path (24-31 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP3x): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 8(rSTR1) + lwz rWORD2, 8(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + slwi. r12, rN, 3 + bne cr1, L(dLcr1x) +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + bne cr6, L(dLcr6x) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ + bne cr7, L(dLcr7x) + addi r1, r1, 64 + cfi_adjust_cfa_offset(-64) + bne L(d00) + li rRTN, 0 + blr + +/* Count is a multiple of 16, remainder is 0 */ + .align 4 + cfi_adjust_cfa_offset(64) +L(dP4): + mtctr r0 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 +L(dP4e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 4(rSTR1) + lwz rWORD4, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 8(rSTR1) + lwz rWORD6, 8(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 12(rSTR1) + lwzu rWORD8, 12(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) + bne cr1, L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(dLoop): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) +L(dLoop1): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) +L(dLoop2): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) +L(dLoop3): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 16(rSTR1) + lwzu rWORD8, 16(rSTR2) +#endif + bne cr1, L(dLcr1) + cmplw cr7, rWORD1, rWORD2 + bdnz L(dLoop) + +L(dL4): + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) + cmplw cr5, rWORD7, rWORD8 +L(d44): + bne cr7, L(dLcr7) +L(d34): + bne cr1, L(dLcr1) +L(d24): + bne cr6, L(dLcr6) +L(d14): + slwi. r12, rN, 3 + bne cr5, L(dLcr5) +L(d04): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) + addi r1, r1, 64 + cfi_adjust_cfa_offset(-64) + subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */ + beq L(zeroLength) +/* At this point we have a remainder of 1 to 3 bytes to compare. Since + we are aligned it is safe to load the whole word, and use + shift right to eliminate bits beyond the compare length. */ +L(d00): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + srw rWORD1, rWORD1, rN + srw rWORD2, rWORD2, rN + sub rRTN, rWORD1, rWORD2 + blr + + .align 4 + cfi_adjust_cfa_offset(64) +L(dLcr7): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) +L(dLcr7x): + li rRTN, 1 + addi r1, r1, 64 + cfi_adjust_cfa_offset(-64) + bgtlr cr7 + li rRTN, -1 + blr + .align 4 + cfi_adjust_cfa_offset(64) +L(dLcr1): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) +L(dLcr1x): + li rRTN, 1 + addi r1, r1, 64 + cfi_adjust_cfa_offset(-64) + bgtlr cr1 + li rRTN, -1 + blr + .align 4 + cfi_adjust_cfa_offset(64) +L(dLcr6): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) +L(dLcr6x): + li rRTN, 1 + addi r1, r1, 64 + cfi_adjust_cfa_offset(-64) + bgtlr cr6 + li rRTN, -1 + blr + .align 4 + cfi_adjust_cfa_offset(64) +L(dLcr5): + lwz rWORD7, 44(r1) + lwz rWORD8, 48(r1) +L(dLcr5x): + li rRTN, 1 + addi r1, r1, 64 + cfi_adjust_cfa_offset(-64) + bgtlr cr5 + li rRTN, -1 + blr + + .align 4 +L(bytealigned): + mtctr rN + +/* We need to prime this loop. This loop is swing modulo scheduled + to avoid pipe delays. The dependent instruction latencies (load to + compare to conditional branch) is 2 to 3 cycles. In this loop each + dispatch group ends in a branch and takes 1 cycle. Effectively + the first iteration of the loop only serves to load operands and + branches based on compares are delayed until the next loop. + + So we must precondition some registers and condition codes so that + we don't exit the loop early on the first iteration. */ + + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + bdz L(b11) + cmplw cr7, rWORD1, rWORD2 + lbz rWORD3, 1(rSTR1) + lbz rWORD4, 1(rSTR2) + bdz L(b12) + cmplw cr1, rWORD3, rWORD4 + lbzu rWORD5, 2(rSTR1) + lbzu rWORD6, 2(rSTR2) + bdz L(b13) + .align 4 +L(bLoop): + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + bne cr7, L(bLcr7) + + cmplw cr6, rWORD5, rWORD6 + bdz L(b3i) + + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + bne cr1, L(bLcr1) + + cmplw cr7, rWORD1, rWORD2 + bdz L(b2i) + + lbzu rWORD5, 1(rSTR1) + lbzu rWORD6, 1(rSTR2) + bne cr6, L(bLcr6) + + cmplw cr1, rWORD3, rWORD4 + bdnz L(bLoop) + +/* We speculatively loading bytes before we have tested the previous + bytes. But we must avoid overrunning the length (in the ctr) to + prevent these speculative loads from causing a segfault. In this + case the loop will exit early (before the all pending bytes are + tested. In this case we must complete the pending operations + before returning. */ +L(b1i): + bne cr7, L(bLcr7) + bne cr1, L(bLcr1) + b L(bx56) + .align 4 +L(b2i): + bne cr6, L(bLcr6) + bne cr7, L(bLcr7) + b L(bx34) + .align 4 +L(b3i): + bne cr1, L(bLcr1) + bne cr6, L(bLcr6) + b L(bx12) + .align 4 +L(bLcr7): + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr +L(bLcr1): + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr +L(bLcr6): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + +L(b13): + bne cr7, L(bx12) + bne cr1, L(bx34) +L(bx56): + sub rRTN, rWORD5, rWORD6 + blr + nop +L(b12): + bne cr7, L(bx12) +L(bx34): + sub rRTN, rWORD3, rWORD4 + blr +L(b11): +L(bx12): + sub rRTN, rWORD1, rWORD2 + blr + .align 4 +L(zeroLength): + li rRTN, 0 + blr + + .align 4 +/* At this point we know the strings have different alignment and the + compare length is at least 8 bytes. r12 contains the low order + 2 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then rStr1 is word aligned and can + perform the Wunaligned loop. + + Otherwise we know that rSTR1 is not already word aligned yet. + So we can force the string addresses to the next lower word + boundary and special case this first word using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (Wualigned) compare loop, starting at the second word, + we need to adjust the length (rN) and special case the loop + versioning for the first W. This ensures that the loop count is + correct and the first W (shifted) is in the expected resister pair. */ +#define rSHL r29 /* Unaligned shift left count. */ +#define rSHR r28 /* Unaligned shift right count. */ +#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */ +#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */ +#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */ +#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */ + cfi_adjust_cfa_offset(64) +L(unaligned): + stw rSHL, 40(r1) + cfi_offset(rSHL, (40-64)) + clrlwi rSHL, rSTR2, 30 + stw rSHR, 36(r1) + cfi_offset(rSHR, (36-64)) + beq cr5, L(Wunaligned) + stw rWORD8_SHIFT, 32(r1) + cfi_offset(rWORD8_SHIFT, (32-64)) +/* Adjust the logical start of rSTR2 to compensate for the extra bits + in the 1st rSTR1 W. */ + sub rWORD8_SHIFT, rSTR2, r12 +/* But do not attempt to address the W before that W that contains + the actual start of rSTR2. */ + clrrwi rSTR2, rSTR2, 2 + stw rWORD2_SHIFT, 28(r1) +/* Compute the left/right shift counts for the unaligned rSTR2, + compensating for the logical (W aligned) start of rSTR1. */ + clrlwi rSHL, rWORD8_SHIFT, 30 + clrrwi rSTR1, rSTR1, 2 + stw rWORD4_SHIFT, 24(r1) + slwi rSHL, rSHL, 3 + cmplw cr5, rWORD8_SHIFT, rSTR2 + add rN, rN, r12 + slwi rWORD6, r12, 3 + stw rWORD6_SHIFT, 20(r1) + cfi_offset(rWORD2_SHIFT, (28-64)) + cfi_offset(rWORD4_SHIFT, (24-64)) + cfi_offset(rWORD6_SHIFT, (20-64)) + subfic rSHR, rSHL, 32 + srwi r0, rN, 4 /* Divide by 16 */ + andi. r12, rN, 12 /* Get the W remainder */ +/* We normally need to load 2 Ws to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a W where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ + li rWORD8, 0 + blt cr5, L(dus0) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD8, 0(rSTR2) + addi rSTR2, rSTR2, 4 +#endif + slw rWORD8, rWORD8, rSHL + +L(dus0): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) +#endif + cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + srw r12, rWORD2, rSHR + clrlwi rN, rN, 30 + beq L(duPs4) + mtctr r0 + or rWORD8, r12, rWORD8 + bgt cr1, L(duPs3) + beq cr1, L(duPs2) + +/* Remainder is 4 */ + .align 4 +L(dusP1): + slw rWORD8_SHIFT, rWORD2, rSHL + slw rWORD7, rWORD1, rWORD6 + slw rWORD8, rWORD8, rWORD6 + bge cr7, L(duP1e) +/* At this point we exit early with the first word compare + complete and remainder of 0 to 3 bytes. See L(du14) for details on + how we handle the remaining bytes. */ + cmplw cr5, rWORD7, rWORD8 + slwi. rN, rN, 3 + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 4(rSTR2) +#endif + srw r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 8 */ + .align 4 +L(duPs2): + slw rWORD6_SHIFT, rWORD2, rSHL + slw rWORD5, rWORD1, rWORD6 + slw rWORD6, rWORD8, rWORD6 + b L(duP2e) +/* Remainder is 12 */ + .align 4 +L(duPs3): + slw rWORD4_SHIFT, rWORD2, rSHL + slw rWORD3, rWORD1, rWORD6 + slw rWORD4, rWORD8, rWORD6 + b L(duP3e) +/* Count is a multiple of 16, remainder is 0 */ + .align 4 +L(duPs4): + mtctr r0 + or rWORD8, r12, rWORD8 + slw rWORD2_SHIFT, rWORD2, rSHL + slw rWORD1, rWORD1, rWORD6 + slw rWORD2, rWORD8, rWORD6 + b L(duP4e) + +/* At this point we know rSTR1 is word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(Wunaligned): + stw rWORD8_SHIFT, 32(r1) + clrrwi rSTR2, rSTR2, 2 + stw rWORD2_SHIFT, 28(r1) + srwi r0, rN, 4 /* Divide by 16 */ + stw rWORD4_SHIFT, 24(r1) + andi. r12, rN, 12 /* Get the W remainder */ + stw rWORD6_SHIFT, 20(r1) + cfi_offset(rWORD8_SHIFT, (32-64)) + cfi_offset(rWORD2_SHIFT, (28-64)) + cfi_offset(rWORD4_SHIFT, (24-64)) + cfi_offset(rWORD6_SHIFT, (20-64)) + slwi rSHL, rSHL, 3 +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD6, 0, rSTR2 + addi rSTR2, rSTR2, 4 + lwbrx rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD6, 0(rSTR2) + lwzu rWORD8, 4(rSTR2) +#endif + cmplwi cr1, r12, 8 + cmplwi cr7, rN, 16 + clrlwi rN, rN, 30 + subfic rSHR, rSHL, 32 + slw rWORD6_SHIFT, rWORD6, rSHL + beq L(duP4) + mtctr r0 + bgt cr1, L(duP3) + beq cr1, L(duP2) + +/* Remainder is 4 */ + .align 4 +L(duP1): + srw r12, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + addi rSTR1, rSTR1, 4 +#else + lwz rWORD7, 0(rSTR1) +#endif + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP1x) +L(duP1e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + srw r0, rWORD2, rSHR + slw rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + srw r12, rWORD4, rSHR + slw rWORD4_SHIFT, rWORD4, rSHL + bne cr5, L(duLcr5) + or rWORD4, r12, rWORD2_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + srw r0, rWORD6, rSHR + slw rWORD6_SHIFT, rWORD6, rSHL + bne cr7, L(duLcr7) + or rWORD6, r0, rWORD4_SHIFT + cmplw cr6, rWORD5, rWORD6 + b L(duLoop3) + .align 4 +/* At this point we exit early with the first word compare + complete and remainder of 0 to 3 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +L(duP1x): + cmplw cr5, rWORD7, rWORD8 + slwi. rN, rN, 3 + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 8(rSTR2) +#endif + srw r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 8 */ + .align 4 +L(duP2): + srw r0, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + addi rSTR1, rSTR1, 4 +#else + lwz rWORD5, 0(rSTR1) +#endif + or rWORD6, r0, rWORD6_SHIFT + slw rWORD6_SHIFT, rWORD8, rSHL +L(duP2e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 4(rSTR1) + lwz rWORD8, 4(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + srw r12, rWORD8, rSHR + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP2x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 8(rSTR1) + lwz rWORD2, 8(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srw r0, rWORD2, rSHR + slw rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 12(rSTR1) + lwz rWORD4, 12(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + bne cr5, L(duLcr5) + srw r12, rWORD4, rSHR + slw rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#endif + cmplw cr1, rWORD3, rWORD4 + b L(duLoop2) + .align 4 +L(duP2x): + cmplw cr5, rWORD7, rWORD8 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#endif + bne cr6, L(duLcr6) + slwi. rN, rN, 3 + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 4(rSTR2) +#endif + srw r0, rWORD2, rSHR + b L(dutrim) + +/* Remainder is 12 */ + .align 4 +L(duP3): + srw r12, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + addi rSTR1, rSTR1, 4 +#else + lwz rWORD3, 0(rSTR1) +#endif + slw rWORD4_SHIFT, rWORD8, rSHL + or rWORD4, r12, rWORD6_SHIFT +L(duP3e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 4(rSTR1) + lwz rWORD6, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + srw r0, rWORD6, rSHR + slw rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD7, 8(rSTR1) + lwz rWORD8, 8(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srw r12, rWORD8, rSHR + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP3x) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 12(rSTR1) + lwz rWORD2, 12(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srw r0, rWORD2, rSHR + slw rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + cmplw cr7, rWORD1, rWORD2 + b L(duLoop1) + .align 4 +L(duP3x): +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif +#if 0 +/* Huh? We've already branched on cr1! */ + bne cr1, L(duLcr1) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + slwi. rN, rN, 3 + bne cr5, L(duLcr5) + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 4(rSTR2) +#endif + srw r0, rWORD2, rSHR + b L(dutrim) + +/* Count is a multiple of 16, remainder is 0 */ + .align 4 +L(duP4): + mtctr r0 + srw r0, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + addi rSTR1, rSTR1, 4 +#else + lwz rWORD1, 0(rSTR1) +#endif + slw rWORD2_SHIFT, rWORD8, rSHL + or rWORD2, r0, rWORD6_SHIFT +L(duP4e): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 4(rSTR1) + lwz rWORD4, 4(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + srw r12, rWORD4, rSHR + slw rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 8(rSTR1) + lwz rWORD6, 8(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + bne cr7, L(duLcr7) + srw r0, rWORD6, rSHR + slw rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 12(rSTR1) + lwzu rWORD8, 12(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srw r12, rWORD8, rSHR + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + cmplw cr5, rWORD7, rWORD8 + bdz L(du24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(duLoop): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 + lwbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD1, 4(rSTR1) + lwz rWORD2, 4(rSTR2) +#endif + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + srw r0, rWORD2, rSHR + slw rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +L(duLoop1): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD3, 0, rSTR1 + lwbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD3, 8(rSTR1) + lwz rWORD4, 8(rSTR2) +#endif + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + srw r12, rWORD4, rSHR + slw rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +L(duLoop2): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD5, 0, rSTR1 + lwbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD5, 12(rSTR1) + lwz rWORD6, 12(rSTR2) +#endif + cmplw cr5, rWORD7, rWORD8 + bne cr7, L(duLcr7) + srw r0, rWORD6, rSHR + slw rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +L(duLoop3): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD7, 0, rSTR1 + lwbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 +#else + lwzu rWORD7, 16(rSTR1) + lwzu rWORD8, 16(rSTR2) +#endif + cmplw cr7, rWORD1, rWORD2 + bne cr1, L(duLcr1) + srw r12, rWORD8, rSHR + slw rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + bdnz L(duLoop) + +L(duL4): +#if 0 +/* Huh? We've already branched on cr1! */ + bne cr1, L(duLcr1) +#endif + cmplw cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + cmplw cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + cmplw cr5, rWORD7, rWORD8 +L(du44): + bne cr7, L(duLcr7) +L(du34): + bne cr1, L(duLcr1) +L(du24): + bne cr6, L(duLcr6) +L(du14): + slwi. rN, rN, 3 + bne cr5, L(duLcr5) +/* At this point we have a remainder of 1 to 3 bytes to compare. We use + shift right to eliminate bits beyond the compare length. + This allows the use of word subtract to compute the final result. + + However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in + rWORD8_SHIFT). */ + cmplw cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 4 +#else + lwz rWORD2, 4(rSTR2) +#endif + srw r0, rWORD2, rSHR + .align 4 +L(dutrim): +#ifdef __LITTLE_ENDIAN__ + lwbrx rWORD1, 0, rSTR1 +#else + lwz rWORD1, 4(rSTR1) +#endif + lwz rWORD8, 48(r1) + subfic rN, rN, 32 /* Shift count is 32 - (rN * 8). */ + or rWORD2, r0, rWORD8_SHIFT + lwz rWORD7, 44(r1) + lwz rSHL, 40(r1) + srw rWORD1, rWORD1, rN + srw rWORD2, rWORD2, rN + lwz rSHR, 36(r1) + lwz rWORD8_SHIFT, 32(r1) + sub rRTN, rWORD1, rWORD2 + b L(dureturn26) + .align 4 +L(duLcr7): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr7, L(dureturn29) + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr1): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr1, L(dureturn29) + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr6): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr6, L(dureturn29) + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr5): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) + li rRTN, 1 + bgt cr5, L(dureturn29) + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) + li rRTN, -1 + b L(dureturn27) + .align 3 +L(duZeroReturn): + li rRTN, 0 + .align 4 +L(dureturn): + lwz rWORD8, 48(r1) + lwz rWORD7, 44(r1) +L(dureturn29): + lwz rSHL, 40(r1) + lwz rSHR, 36(r1) +L(dureturn27): + lwz rWORD8_SHIFT, 32(r1) +L(dureturn26): + lwz rWORD2_SHIFT, 28(r1) +L(dureturn25): + lwz rWORD4_SHIFT, 24(r1) + lwz rWORD6_SHIFT, 20(r1) + addi r1, r1, 64 + cfi_adjust_cfa_offset(-64) + blr +END (memcmp) + +libc_hidden_builtin_def (memcmp) +weak_alias (memcmp, bcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memcpy.S new file mode 100644 index 0000000000..8e33c1d733 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memcpy.S @@ -0,0 +1,538 @@ +/* Optimized memcpy implementation for PowerPC32/POWER7. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. */ + + .machine power7 +EALIGN (memcpy, 5, 0) + CALL_MCOUNT + + stwu 1,-32(1) + cfi_adjust_cfa_offset(32) + stw 30,20(1) + cfi_offset(30,(20-32)) + stw 31,24(1) + mr 30,3 + cmplwi cr1,5,31 + neg 0,3 + cfi_offset(31,-8) + ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move + code. */ + + andi. 11,3,15 /* Check alignment of DST. */ + clrlwi 10,4,28 /* Check alignment of SRC. */ + cmplw cr6,10,11 /* SRC and DST alignments match? */ + mr 12,4 + mr 31,5 + bne cr6,L(copy_GE_32_unaligned) + + srwi 9,5,3 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_aligned_cont) + + clrlwi 0,0,29 + mtcrf 0x01,0 + subf 31,0,5 + + /* Get the SRC aligned to 8 bytes. */ + +1: bf 31,2f + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: bf 30,4f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: bf 29,0f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +0: + clrlwi 10,12,29 /* Check alignment of SRC again. */ + srwi 9,31,3 /* Number of full doublewords remaining. */ + +L(copy_GE_32_aligned_cont): + + clrlwi 11,31,29 + mtcrf 0x01,9 + + srwi 8,31,5 + cmplwi cr1,9,4 + cmplwi cr6,11,0 + mr 11,12 + + /* Copy 1~3 doublewords so the main loop starts + at a multiple of 32 bytes. */ + + bf 30,1f + lfd 6,0(12) + lfd 7,8(12) + addi 11,12,16 + mtctr 8 + stfd 6,0(3) + stfd 7,8(3) + addi 10,3,16 + bf 31,4f + lfd 0,16(12) + stfd 0,16(3) + blt cr1,3f + addi 11,12,24 + addi 10,3,24 + b 4f + + .align 4 +1: /* Copy 1 doubleword and set the counter. */ + mr 10,3 + mtctr 8 + bf 31,4f + lfd 6,0(12) + addi 11,12,8 + stfd 6,0(3) + addi 10,3,8 + +L(aligned_copy): + /* Main aligned copy loop. Copies up to 128-bytes at a time. */ + .align 4 +4: + /* check for any 32-byte or 64-byte lumps that are outside of a + nice 128-byte range. R8 contains the number of 32-byte + lumps, so drop this into the CR, and use the SO/EQ bits to help + handle the 32- or 64- byte lumps. Then handle the rest with an + unrolled 128-bytes-at-a-time copy loop. */ + mtocrf 1,8 + li 6,16 # 16() index + li 7,32 # 32() index + li 8,48 # 48() index + +L(aligned_32byte): + /* if the SO bit (indicating a 32-byte lump) is not set, move along. */ + bns cr7,L(aligned_64byte) + lxvd2x 6,0,11 + lxvd2x 7,11,6 + addi 11,11,32 + stxvd2x 6,0,10 + stxvd2x 7,10,6 + addi 10,10,32 + +L(aligned_64byte): + /* if the EQ bit (indicating a 64-byte lump) is not set, move along. */ + bne cr7,L(aligned_128setup) + lxvd2x 6,0,11 + lxvd2x 7,11,6 + lxvd2x 8,11,7 + lxvd2x 9,11,8 + addi 11,11,64 + stxvd2x 6,0,10 + stxvd2x 7,10,6 + stxvd2x 8,10,7 + stxvd2x 9,10,8 + addi 10,10,64 + +L(aligned_128setup): + /* Set up for the 128-byte at a time copy loop. */ + srwi 8,31,7 + cmpwi 8,0 # Any 4x lumps left? + beq 3f # if not, move along. + lxvd2x 6,0,11 + lxvd2x 7,11,6 + mtctr 8 # otherwise, load the ctr and begin. + li 8,48 # 48() index + b L(aligned_128loop) + +L(aligned_128head): + /* for the 2nd + iteration of this loop. */ + lxvd2x 6,0,11 + lxvd2x 7,11,6 +L(aligned_128loop): + lxvd2x 8,11,7 + lxvd2x 9,11,8 + stxvd2x 6,0,10 + addi 11,11,64 + stxvd2x 7,10,6 + stxvd2x 8,10,7 + stxvd2x 9,10,8 + lxvd2x 6,0,11 + lxvd2x 7,11,6 + addi 10,10,64 + lxvd2x 8,11,7 + lxvd2x 9,11,8 + addi 11,11,64 + stxvd2x 6,0,10 + stxvd2x 7,10,6 + stxvd2x 8,10,7 + stxvd2x 9,10,8 + addi 10,10,64 + bdnz L(aligned_128head) + +3: + /* Check for tail bytes. */ + clrrwi 0,31,3 + mtcrf 0x01,31 + beq cr6,0f + +.L9: + add 3,3,0 + add 12,12,0 + + /* At this point we have a tail of 0-7 bytes and we know that the + destination is doubleword-aligned. */ +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2 bytes. */ + bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return original DST pointer. */ + mr 3,30 + lwz 30,20(1) + lwz 31,24(1) + addi 1,1,32 + blr + + /* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32): + cmplwi cr6,5,8 + mr 12,4 + mtcrf 0x01,5 + ble cr6,L(copy_LE_8) + + /* At least 9 bytes to go. */ + neg 8,4 + clrrwi 11,4,2 + andi. 0,8,3 + cmplwi cr1,5,16 + mr 10,5 + beq L(copy_LT_32_aligned) + + /* Force 4-bytes alignment for SRC. */ + mtocrf 0x01,0 + subf 10,0,5 +2: bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,L(end_4bytes_alignment) + + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 + + .align 4 +L(end_4bytes_alignment): + cmplwi cr1,10,16 + mtcrf 0x01,10 + +L(copy_LT_32_aligned): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ + lwz 6,0(12) + lwz 7,4(12) + stw 6,0(3) + lwz 8,8(12) + stw 7,4(3) + lwz 6,12(12) + addi 12,12,16 + stw 8,8(3) + stw 6,12(3) + addi 3,3,16 +8: /* Copy 8 bytes. */ + bf 28,4f + + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2-3 bytes. */ + bf 30,1f + + lhz 6,0(12) + sth 6,0(3) + bf 31,0f + lbz 7,2(12) + stb 7,2(3) + + /* Return original DST pointer. */ + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr + + .align 4 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return original DST pointer. */ + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr + + /* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8): + bne cr6,4f + + /* Though we could've used lfd/stfd here, they are still + slow for unaligned cases. */ + + lwz 6,0(4) + lwz 7,4(4) + stw 6,0(3) + stw 7,4(3) + + /* Return original DST pointer. */ + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr + + .align 4 +4: /* Copies 4~7 bytes. */ + bf 29,2b + + lwz 6,0(4) + stw 6,0(3) + bf 30,5f + lhz 7,4(4) + sth 7,4(3) + bf 31,0f + lbz 8,6(4) + stb 8,6(3) + + /* Return original DST pointer. */ + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr + + .align 4 +5: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,4(4) + stb 6,4(3) + +0: /* Return original DST pointer. */ + mr 3,30 + lwz 30,20(1) + addi 1,1,32 + blr + + /* Handle copies of 32+ bytes where DST is aligned (to quadword) but + SRC is not. Use aligned quadword loads from SRC, shifted to realign + the data, allowing for aligned DST stores. */ + .align 4 +L(copy_GE_32_unaligned): + andi. 11,3,15 /* Check alignment of DST. */ + clrlwi 0,0,28 /* Number of bytes until the 1st + quadword of DST. */ + srwi 9,5,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont) + + /* DST is not quadword aligned, get it aligned. */ + + mtcrf 0x01,0 + subf 31,0,5 + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +1: /* Copy 1 byte. */ + bf 31,2f + + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: /* Copy 2 bytes. */ + bf 30,4f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: /* Copy 4 bytes. */ + bf 29,8f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +8: /* Copy 8 bytes. */ + bf 28,0f + + lfd 6,0(12) + addi 12,12,8 + stfd 6,0(3) + addi 3,3,8 +0: + clrlwi 10,12,28 /* Check alignment of SRC. */ + srwi 9,31,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ +L(copy_GE_32_unaligned_cont): + + /* Setup two indexes to speed up the indexed vector operations. */ + clrlwi 11,31,28 + li 6,16 /* Index for 16-bytes offsets. */ + li 7,32 /* Index for 32-bytes offsets. */ + cmplwi cr1,11,0 + srwi 8,31,5 /* Setup the loop counter. */ + mr 10,3 + mr 11,12 + mtcrf 0x01,9 + cmplwi cr6,9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr 5,0,12 +#else + lvsl 5,0,12 +#endif + lvx 3,0,12 + bf 31,L(setup_unaligned_loop) + + /* Copy another 16 bytes to align to 32-bytes due to the loop . */ + lvx 4,12,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + addi 11,12,16 + addi 10,3,16 + stvx 6,0,3 + vor 3,4,4 + +L(setup_unaligned_loop): + mtctr 8 + ble cr6,L(end_unaligned_loop) + + /* Copy 32 bytes at a time using vector instructions. */ + .align 4 +L(unaligned_loop): + + /* Note: vr6/vr10 may contain data that was already copied, + but in order to get proper alignment, we may have to copy + some portions again. This is faster than having unaligned + vector instructions though. */ + + lvx 4,11,6 /* vr4 = r11+16. */ +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + lvx 3,11,7 /* vr3 = r11+32. */ +#ifdef __LITTLE_ENDIAN__ + vperm 10,3,4,5 +#else + vperm 10,4,3,5 +#endif + addi 11,11,32 + stvx 6,0,10 + stvx 10,10,6 + addi 10,10,32 + + bdnz L(unaligned_loop) + + .align 4 +L(end_unaligned_loop): + + /* Check for tail bytes. */ + clrrwi 0,31,4 + mtcrf 0x01,31 + beq cr1,0f + + add 3,3,0 + add 12,12,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ +8: /* Copy 8 bytes. */ + bf 28,4f + + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2~3 bytes. */ + bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return original DST pointer. */ + mr 3,30 + lwz 30,20(1) + lwz 31,24(1) + addi 1,1,32 + blr + +END (memcpy) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/mempcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/mempcpy.S new file mode 100644 index 0000000000..1682fbcd2a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/mempcpy.S @@ -0,0 +1,482 @@ +/* Optimized mempcpy implementation for POWER7. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] __mempcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst' + 'len'. */ + + .machine power7 +EALIGN (__mempcpy, 5, 0) + CALL_MCOUNT + + stwu 1,-32(1) + cfi_adjust_cfa_offset(32) + stw 30,20(1) + cfi_offset(30,(20-32)) + stw 31,24(1) + mr 30,3 + cmplwi cr1,5,31 + neg 0,3 + cfi_offset(31,-8) + ble cr1,L(copy_LT_32) /* If move < 32 bytes use short move + code. */ + + andi. 11,3,7 /* Check alignment of DST. */ + clrlwi 10,4,29 /* Check alignment of SRC. */ + cmplw cr6,10,11 /* SRC and DST alignments match? */ + mr 12,4 + mr 31,5 + bne cr6,L(copy_GE_32_unaligned) + + srwi 9,5,3 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_aligned_cont) + + clrlwi 0,0,29 + mtcrf 0x01,0 + subf 31,0,5 + + /* Get the SRC aligned to 8 bytes. */ + +1: bf 31,2f + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: bf 30,4f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: bf 29,0f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +0: + clrlwi 10,12,29 /* Check alignment of SRC again. */ + srwi 9,31,3 /* Number of full doublewords remaining. */ + +L(copy_GE_32_aligned_cont): + + clrlwi 11,31,29 + mtcrf 0x01,9 + + srwi 8,31,5 + cmplwi cr1,9,4 + cmplwi cr6,11,0 + mr 11,12 + + /* Copy 1~3 doublewords so the main loop starts + at a multiple of 32 bytes. */ + + bf 30,1f + lfd 6,0(12) + lfd 7,8(12) + addi 11,12,16 + mtctr 8 + stfd 6,0(3) + stfd 7,8(3) + addi 10,3,16 + bf 31,4f + lfd 0,16(12) + stfd 0,16(3) + blt cr1,3f + addi 11,12,24 + addi 10,3,24 + b 4f + + .align 4 +1: /* Copy 1 doubleword and set the counter. */ + mr 10,3 + mtctr 8 + bf 31,4f + lfd 6,0(12) + addi 11,12,8 + stfd 6,0(3) + addi 10,3,8 + + .align 4 +4: /* Main aligned copy loop. Copies 32-bytes at a time. */ + lfd 6,0(11) + lfd 7,8(11) + lfd 8,16(11) + lfd 0,24(11) + addi 11,11,32 + + stfd 6,0(10) + stfd 7,8(10) + stfd 8,16(10) + stfd 0,24(10) + addi 10,10,32 + bdnz 4b +3: + + /* Check for tail bytes. */ + + clrrwi 0,31,3 + mtcrf 0x01,31 + beq cr6,0f + +.L9: + add 3,3,0 + add 12,12,0 + + /* At this point we have a tail of 0-7 bytes and we know that the + destination is doubleword-aligned. */ +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2 bytes. */ + bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return DST + LEN pointer. */ + add 3,30,5 + lwz 30,20(1) + lwz 31,24(1) + addi 1,1,32 + blr + + /* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32): + cmplwi cr6,5,8 + mr 12,4 + mtcrf 0x01,5 + ble cr6,L(copy_LE_8) + + /* At least 9 bytes to go. */ + neg 8,4 + clrrwi 11,4,2 + andi. 0,8,3 + cmplwi cr1,5,16 + mr 10,5 + beq L(copy_LT_32_aligned) + + /* Force 4-bytes alignment for SRC. */ + mtocrf 0x01,0 + subf 10,0,5 +2: bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,L(end_4bytes_alignment) + + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 + + .align 4 +L(end_4bytes_alignment): + cmplwi cr1,10,16 + mtcrf 0x01,10 + +L(copy_LT_32_aligned): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ + lwz 6,0(12) + lwz 7,4(12) + stw 6,0(3) + lwz 8,8(12) + stw 7,4(3) + lwz 6,12(12) + addi 12,12,16 + stw 8,8(3) + stw 6,12(3) + addi 3,3,16 +8: /* Copy 8 bytes. */ + bf 28,4f + + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2-3 bytes. */ + bf 30,1f + + lhz 6,0(12) + sth 6,0(3) + bf 31,0f + lbz 7,2(12) + stb 7,2(3) + + /* Return DST + LEN pointer. */ + add 3,30,5 + lwz 30,20(1) + addi 1,1,32 + blr + + .align 4 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return DST + LEN pointer. */ + add 3,30,5 + lwz 30,20(1) + addi 1,1,32 + blr + + /* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8): + bne cr6,4f + + /* Though we could've used lfd/stfd here, they are still + slow for unaligned cases. */ + + lwz 6,0(4) + lwz 7,4(4) + stw 6,0(3) + stw 7,4(3) + + /* Return DST + LEN pointer. */ + add 3,30,5 + lwz 30,20(1) + addi 1,1,32 + blr + + .align 4 +4: /* Copies 4~7 bytes. */ + bf 29,2b + + lwz 6,0(4) + stw 6,0(3) + bf 30,5f + lhz 7,4(4) + sth 7,4(3) + bf 31,0f + lbz 8,6(4) + stb 8,6(3) + + /* Return DST + LEN pointer. */ + add 3,30,5 + lwz 30,20(1) + addi 1,1,32 + blr + + .align 4 +5: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,4(4) + stb 6,4(3) + +0: /* Return DST + LEN pointer. */ + add 3,30,5 + lwz 30,20(1) + addi 1,1,32 + blr + + /* Handle copies of 32+ bytes where DST is aligned (to quadword) but + SRC is not. Use aligned quadword loads from SRC, shifted to realign + the data, allowing for aligned DST stores. */ + .align 4 +L(copy_GE_32_unaligned): + andi. 11,3,15 /* Check alignment of DST. */ + clrlwi 0,0,28 /* Number of bytes until the 1st + quadword of DST. */ + srwi 9,5,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont) + + /* DST is not quadword aligned, get it aligned. */ + + mtcrf 0x01,0 + subf 31,0,5 + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +1: /* Copy 1 byte. */ + bf 31,2f + + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: /* Copy 2 bytes. */ + bf 30,4f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: /* Copy 4 bytes. */ + bf 29,8f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +8: /* Copy 8 bytes. */ + bf 28,0f + + lfd 6,0(12) + addi 12,12,8 + stfd 6,0(3) + addi 3,3,8 +0: + clrlwi 10,12,28 /* Check alignment of SRC. */ + srwi 9,31,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ +L(copy_GE_32_unaligned_cont): + + /* Setup two indexes to speed up the indexed vector operations. */ + clrlwi 11,31,28 + li 6,16 /* Index for 16-bytes offsets. */ + li 7,32 /* Index for 32-bytes offsets. */ + cmplwi cr1,11,0 + srwi 8,31,5 /* Setup the loop counter. */ + mr 10,3 + mr 11,12 + mtcrf 0x01,9 + cmplwi cr6,9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr 5,0,12 +#else + lvsl 5,0,12 +#endif + lvx 3,0,12 + bf 31,L(setup_unaligned_loop) + + /* Copy another 16 bytes to align to 32-bytes due to the loop . */ + lvx 4,12,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + addi 11,12,16 + addi 10,3,16 + stvx 6,0,3 + vor 3,4,4 + +L(setup_unaligned_loop): + mtctr 8 + ble cr6,L(end_unaligned_loop) + + /* Copy 32 bytes at a time using vector instructions. */ + .align 4 +L(unaligned_loop): + + /* Note: vr6/vr10 may contain data that was already copied, + but in order to get proper alignment, we may have to copy + some portions again. This is faster than having unaligned + vector instructions though. */ + + lvx 4,11,6 /* vr4 = r11+16. */ +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + lvx 3,11,7 /* vr3 = r11+32. */ +#ifdef __LITTLE_ENDIAN__ + vperm 10,3,4,5 +#else + vperm 10,4,3,5 +#endif + addi 11,11,32 + stvx 6,0,10 + stvx 10,10,6 + addi 10,10,32 + + bdnz L(unaligned_loop) + + .align 4 +L(end_unaligned_loop): + + /* Check for tail bytes. */ + clrrwi 0,31,4 + mtcrf 0x01,31 + beq cr1,0f + + add 3,3,0 + add 12,12,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ +8: /* Copy 8 bytes. */ + bf 28,4f + + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2~3 bytes. */ + bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return DST + LEN pointer. */ + add 3,30,5 + lwz 30,20(1) + lwz 31,24(1) + addi 1,1,32 + blr + +END (__mempcpy) +libc_hidden_def (__mempcpy) +weak_alias (__mempcpy, mempcpy) +libc_hidden_builtin_def (mempcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memrchr.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memrchr.S new file mode 100644 index 0000000000..eb0c1bb8eb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memrchr.S @@ -0,0 +1,196 @@ +/* Optimized memrchr implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5]) */ + .machine power7 +ENTRY (__memrchr) + CALL_MCOUNT + add r7,r3,r5 /* Calculate the last acceptable address. */ + neg r0,r7 + addi r7,r7,-1 + mr r10,r3 + clrrwi r6,r7,7 + li r9,3<<5 + dcbt r9,r6,16 /* Stream hint, decreasing addresses. */ + + /* Replicate BYTE to word. */ + insrwi r4,r4,8,16 + insrwi r4,r4,16,0 + li r6,-4 + li r9,-1 + rlwinm r0,r0,3,27,28 /* Calculate padding. */ + clrrwi r8,r7,2 + srw r9,r9,r0 + cmplwi r5,16 + clrrwi r0,r10,2 + ble L(small_range) + +#ifdef __LITTLE_ENDIAN__ + lwzx r12,0,r8 +#else + lwbrx r12,0,r8 /* Load reversed word from memory. */ +#endif + cmpb r3,r12,r4 /* Check for BYTE in WORD1. */ + and r3,r3,r9 + cmplwi cr7,r3,0 /* If r3 == 0, no BYTEs have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + bf 29,L(loop_setup) + + /* Handle WORD2 of pair. */ +#ifdef __LITTLE_ENDIAN__ + lwzx r12,r8,r6 +#else + lwbrx r12,r8,r6 +#endif + addi r8,r8,-4 + cmpb r3,r12,r4 + cmplwi cr7,r3,0 + bne cr7,L(done) + +L(loop_setup): + /* The last word we want to read in the loop below is the one + containing the first byte of the string, ie. the word at + s & ~3, or r0. The first word read is at r8 - 4, we + read 2 * cnt words, so the last word read will be at + r8 - 4 - 8 * cnt + 4. Solving for cnt gives + cnt = (r8 - r0) / 8 */ + sub r5,r8,r0 + addi r8,r8,-4 + srwi r9,r5,3 /* Number of loop iterations. */ + mtctr r9 /* Setup the counter. */ + + /* Main loop to look for BYTE backwards in the string. + FIXME: Investigate whether 32 byte align helps with this + 9 instruction loop. */ + .align 5 +L(loop): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + +#ifdef __LITTLE_ENDIAN__ + lwzx r12,0,r8 + lwzx r11,r8,r6 +#else + lwbrx r12,0,r8 + lwbrx r11,r8,r6 +#endif + cmpb r3,r12,r4 + cmpb r9,r11,r4 + or r5,r9,r3 /* Merge everything in one word. */ + cmplwi cr7,r5,0 + bne cr7,L(found) + addi r8,r8,-8 + bdnz L(loop) + + /* We may have one more word to read. */ + cmplw r8,r0 + bnelr + +#ifdef __LITTLE_ENDIAN__ + lwzx r12,0,r8 +#else + lwbrx r12,0,r8 +#endif + cmpb r3,r12,r4 + cmplwi cr7,r3,0 + bne cr7,L(done) + blr + + .align 4 +L(found): + /* OK, one (or both) of the words contains BYTE. Check + the first word. */ + cmplwi cr6,r3,0 + bne cr6,L(done) + + /* BYTE must be in the second word. Adjust the address + again and move the result of cmpb to r3 so we can calculate the + pointer. */ + + mr r3,r9 + addi r8,r8,-4 + + /* r3 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as BYTE in the original + word from the string. Use that to calculate the pointer. + We need to make sure BYTE is *before* the end of the + range. */ +L(done): + cntlzw r9,r3 /* Count leading zeros before the match. */ + cmplw r8,r0 /* Are we on the last word? */ + srwi r6,r9,3 /* Convert leading zeros to bytes. */ + addi r0,r6,-3 + sub r3,r8,r0 + cmplw cr7,r3,r10 + bnelr + bgelr cr7 + li r3,0 + blr + + .align 4 +L(null): + li r3,0 + blr + +/* Deals with size <= 16. */ + .align 4 +L(small_range): + cmplwi r5,0 + beq L(null) + +#ifdef __LITTLE_ENDIAN__ + lwzx r12,0,r8 +#else + lwbrx r12,0,r8 /* Load reversed word from memory. */ +#endif + cmpb r3,r12,r4 /* Check for BYTE in WORD1. */ + and r3,r3,r9 + cmplwi cr7,r3,0 + bne cr7,L(done) + + /* Are we done already? */ + cmplw r8,r0 + addi r8,r8,-4 + beqlr + + .align 5 +L(loop_small): +#ifdef __LITTLE_ENDIAN__ + lwzx r12,0,r8 +#else + lwbrx r12,0,r8 +#endif + cmpb r3,r12,r4 + cmplw r8,r0 + cmplwi cr7,r3,0 + bne cr7,L(done) + addi r8,r8,-4 + bne L(loop_small) + blr + +END (__memrchr) +weak_alias (__memrchr, memrchr) +libc_hidden_builtin_def (memrchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memset.S new file mode 100644 index 0000000000..b431f5086d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/memset.S @@ -0,0 +1,431 @@ +/* Optimized memset implementation for PowerPC32/POWER7. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. */ + + .machine power7 +EALIGN (memset, 5, 0) + CALL_MCOUNT + + .align 4 +L(_memset): + cmplwi cr7,5,31 + cmplwi cr6,5,8 + mr 10,3 /* Save original argument for later. */ + mr 7,1 /* Save original r1 for later. */ + cfi_offset(31,-8) + + /* Replicate byte to word. */ + insrwi 4,4,8,16 + insrwi 4,4,16,0 + + ble cr6,L(small) /* If length <= 8, use short copy code. */ + + neg 0,3 + ble cr7,L(medium) /* If length < 32, use medium copy code. */ + + /* Save our word twice to create a doubleword that we will later + copy to a FPR. */ + stwu 1,-32(1) + andi. 11,10,7 /* Check alignment of DST. */ + mr 12,5 + stw 4,24(1) + stw 4,28(1) + beq L(big_aligned) + + clrlwi 0,0,29 + mtocrf 0x01,0 + subf 5,0,5 + + /* Get DST aligned to 8 bytes. */ +1: bf 31,2f + + stb 4,0(10) + addi 10,10,1 +2: bf 30,4f + + sth 4,0(10) + addi 10,10,2 +4: bf 29,L(big_aligned) + + stw 4,0(10) + addi 10,10,4 + + .align 4 +L(big_aligned): + cmplwi cr5,5,255 + li 0,32 + cmplwi cr1,5,160 + dcbtst 0,10 + cmplwi cr6,4,0 + srwi 9,5,3 /* Number of full doublewords remaining. */ + crand 27,26,21 + mtocrf 0x01,9 + bt 27,L(huge) + + /* From this point on, we'll copy 32+ bytes and the value + isn't 0 (so we can't use dcbz). */ + + srwi 8,5,5 + clrlwi 11,5,29 + cmplwi cr6,11,0 + cmplwi cr1,9,4 + mtctr 8 + + /* Copy 1~3 doublewords so the main loop starts + at a multiple of 32 bytes. */ + + bf 30,1f + + stw 4,0(10) + stw 4,4(10) + stw 4,8(10) + stw 4,12(10) + addi 10,10,16 + bf 31,L(big_loop) + + stw 4,0(10) + stw 4,4(10) + addi 10,10,8 + mr 12,10 + blt cr1,L(tail_bytes) + + b L(big_loop) + + .align 4 +1: /* Copy 1 doubleword. */ + bf 31,L(big_loop) + + stw 4,0(10) + stw 4,4(10) + addi 10,10,8 + + /* First use a 32-bytes loop with stw's to try and avoid the LHS due + to the lfd we will do next. Also, ping-pong through r10 and r12 + to avoid AGEN delays. */ + .align 4 +L(big_loop): + addi 12,10,32 + stw 4,0(10) + stw 4,4(10) + stw 4,8(10) + stw 4,12(10) + stw 4,16(10) + stw 4,20(10) + stw 4,24(10) + stw 4,28(10) + bdz L(tail_bytes) + + addi 10,10,64 + stw 4,0(12) + stw 4,4(12) + stw 4,8(12) + stw 4,12(12) + stw 4,16(12) + stw 4,20(12) + stw 4,24(12) + stw 4,28(12) + bdnz L(big_loop_fast_setup) + + mr 12,10 + b L(tail_bytes) + + /* Now that we're probably past the LHS window, use the VSX to + speed up the loop. */ +L(big_loop_fast_setup): + li 11,24 + li 6,16 + lxvdsx 4,1,11 + + .align 4 +L(big_loop_fast): + addi 12,10,32 + stxvd2x 4,0,10 + stxvd2x 4,10,6 + bdz L(tail_bytes) + + addi 10,10,64 + stxvd2x 4,0,12 + stxvd2x 4,12,6 + bdnz L(big_loop_fast) + + mr 12,10 + + .align 4 +L(tail_bytes): + + /* Check for tail bytes. */ + mr 1,7 /* Restore r1. */ + beqlr cr6 + + clrlwi 0,5,29 + mtocrf 0x01,0 + + /* At this point we have a tail of 0-7 bytes and we know that the + destination is doubleword-aligned. */ +4: /* Copy 4 bytes. */ + bf 29,2f + + stw 4,0(12) + addi 12,12,4 +2: /* Copy 2 bytes. */ + bf 30,1f + + sth 4,0(12) + addi 12,12,2 +1: /* Copy 1 byte. */ + bflr 31 + + stb 4,0(12) + blr + + + /* Special case when value is 0 and we have a long length to deal + with. Use dcbz to zero out 128-bytes at a time. Before using + dcbz though, we need to get the destination 128-bytes aligned. */ + .align 4 +L(huge): + lfd 4,24(1) + andi. 11,10,127 + neg 0,10 + beq L(huge_aligned) + + clrlwi 0,0,25 + subf 5,0,5 + srwi 0,0,3 + mtocrf 0x01,0 + + /* Get DST aligned to 128 bytes. */ +8: bf 28,4f + + stfd 4,0(10) + stfd 4,8(10) + stfd 4,16(10) + stfd 4,24(10) + stfd 4,32(10) + stfd 4,40(10) + stfd 4,48(10) + stfd 4,56(10) + addi 10,10,64 + .align 4 +4: bf 29,2f + + stfd 4,0(10) + stfd 4,8(10) + stfd 4,16(10) + stfd 4,24(10) + addi 10,10,32 + .align 4 +2: bf 30,1f + + stfd 4,0(10) + stfd 4,8(10) + addi 10,10,16 + .align 4 +1: bf 31,L(huge_aligned) + + stfd 4,0(10) + addi 10,10,8 + +L(huge_aligned): + srwi 8,5,7 + clrlwi 11,5,25 + cmplwi cr6,11,0 + mtctr 8 + + /* Copies 128-bytes at a time. */ + .align 4 +L(huge_loop): + dcbz 0,10 + addi 10,10,128 + bdnz L(huge_loop) + + /* We have a tail of 0~127 bytes to handle. */ + mr 1,7 /* Restore r1. */ + beqlr cr6 + + subf 9,3,10 + subf 5,9,12 + srwi 8,5,3 + cmplwi cr6,8,0 + mtocrf 0x01,8 + + /* We have a tail o 1~127 bytes. Copy up to 15 doublewords for + speed. We'll handle the resulting tail bytes later. */ + beq cr6,L(tail) + +8: bf 28,4f + + stfd 4,0(10) + stfd 4,8(10) + stfd 4,16(10) + stfd 4,24(10) + stfd 4,32(10) + stfd 4,40(10) + stfd 4,48(10) + stfd 4,56(10) + addi 10,10,64 + .align 4 +4: bf 29,2f + + stfd 4,0(10) + stfd 4,8(10) + stfd 4,16(10) + stfd 4,24(10) + addi 10,10,32 + .align 4 +2: bf 30,1f + + stfd 4,0(10) + stfd 4,8(10) + addi 10,10,16 + .align 4 +1: bf 31,L(tail) + + stfd 4,0(10) + addi 10,10,8 + + /* Handle the rest of the tail bytes here. */ +L(tail): + mtocrf 0x01,5 + + .align 4 +4: bf 29,2f + + stw 4,0(10) + addi 10,10,4 + .align 4 +2: bf 30,1f + + sth 4,0(10) + addi 10,10,2 + .align 4 +1: bflr 31 + + stb 4,0(10) + blr + + + /* Expanded tree to copy tail bytes without increments. */ + .align 4 +L(copy_tail): + bf 29,L(FXX) + + stw 4,0(10) + bf 30,L(TFX) + + sth 4,4(10) + bflr 31 + + stb 4,6(10) + blr + + .align 4 +L(FXX): bf 30,L(FFX) + + sth 4,0(10) + bflr 31 + + stb 4,2(10) + blr + + .align 4 +L(TFX): bflr 31 + + stb 4,4(10) + blr + + .align 4 +L(FFX): bflr 31 + + stb 4,0(10) + blr + + /* Handle copies of 9~31 bytes. */ + .align 4 +L(medium): + /* At least 9 bytes to go. */ + andi. 11,10,3 + clrlwi 0,0,30 + beq L(medium_aligned) + + /* Force 4-bytes alignment for DST. */ + mtocrf 0x01,0 + subf 5,0,5 +1: /* Copy 1 byte. */ + bf 31,2f + + stb 4,0(10) + addi 10,10,1 +2: /* Copy 2 bytes. */ + bf 30,L(medium_aligned) + + sth 4,0(10) + addi 10,10,2 + + .align 4 +L(medium_aligned): + /* At least 6 bytes to go, and DST is word-aligned. */ + cmplwi cr1,5,16 + mtocrf 0x01,5 + blt cr1,8f + + /* Copy 16 bytes. */ + stw 4,0(10) + stw 4,4(10) + stw 4,8(10) + stw 4,12(10) + addi 10,10,16 +8: /* Copy 8 bytes. */ + bf 28,4f + + stw 4,0(10) + stw 4,4(10) + addi 10,10,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + stw 4,0(10) + addi 10,10,4 +2: /* Copy 2-3 bytes. */ + bf 30,1f + + sth 4,0(10) + addi 10,10,2 +1: /* Copy 1 byte. */ + bflr 31 + + stb 4,0(10) + blr + + /* Handles copies of 0~8 bytes. */ + .align 4 +L(small): + mtocrf 0x01,5 + bne cr6,L(copy_tail) + + stw 4,0(10) + stw 4,4(10) + blr + +END (memset) +libc_hidden_builtin_def (memset) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/multiarch/Implies new file mode 100644 index 0000000000..22c12fd393 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power6/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/rawmemchr.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/rawmemchr.S new file mode 100644 index 0000000000..22edcfb209 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/rawmemchr.S @@ -0,0 +1,110 @@ +/* Optimized rawmemchr implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] rawmemchr (void *s [r3], int c [r4]) */ + .machine power7 +ENTRY (__rawmemchr) + CALL_MCOUNT + dcbt 0,r3 + clrrwi r8,r3,2 /* Align the address to word boundary. */ + + /* Replicate byte to word. */ + insrwi r4,r4,8,16 + insrwi r4,r4,16,0 + + /* Now r4 has a word of c bytes. */ + + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + lwz r12,0(r8) /* Load word from memory. */ + cmpb r5,r12,r4 /* Compare each byte against c byte. */ +#ifdef __LITTLE_ENDIAN__ + srw r5,r5,r6 + slw r5,r5,r6 +#else + slw r5,r5,r6 /* Move left to discard ignored bits. */ + srw r5,r5,r6 /* Bring the bits back as zeros. */ +#endif + cmpwi cr7,r5,0 /* If r5 == 0, no c bytes have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 29,L(loop) + + /* Handle WORD2 of pair. */ + lwzu r12,4(r8) + cmpb r5,r12,r4 + cmpwi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + lwz r12,4(r8) + lwzu r11,8(r8) + cmpb r5,r12,r4 + cmpb r6,r11,r4 + or r7,r5,r6 + cmpwi cr7,r7,0 + beq cr7,L(loop) + + /* OK, one (or both) of the words contains a 'c' byte. Check + the first word and decrement the address in case the first + word really contains a c byte. */ + + cmpwi cr6,r5,0 + addi r8,r8,-4 + bne cr6,L(done) + + /* The 'c' byte must be in the second word. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + mr r5,r6 + addi r8,r8,4 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the 'c' byte in the original + word from the string. Use that fact to find out what is + the position of the byte inside the string. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntw r0,r0 +#else + cntlzw r0,r5 /* Count leading zeros before the match. */ +#endif + srwi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching char. */ + blr +END (__rawmemchr) +weak_alias (__rawmemchr,rawmemchr) +libc_hidden_builtin_def (__rawmemchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strcasecmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strcasecmp.S new file mode 100644 index 0000000000..964875a13b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strcasecmp.S @@ -0,0 +1,129 @@ +/* Optimized strcasecmp implementation for PowerPC32. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <locale-defines.h> + +/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) + + or if defined USE_IN_EXTENDED_LOCALE_MODEL: + + int [r3] strcasecmp_l (const char *s1 [r3], const char *s2 [r4], + __locale_t loc [r5]) */ + +#ifndef STRCMP +# define __STRCMP __strcasecmp +# define STRCMP strcasecmp +#endif + +ENTRY (__STRCMP) + +#define rRTN r3 /* Return value */ +#define rSTR1 r5 /* 1st string */ +#define rSTR2 r4 /* 2nd string */ +#define rLOCARG r5 /* 3rd argument: locale_t */ +#define rCHAR1 r6 /* Byte read from 1st string */ +#define rCHAR2 r7 /* Byte read from 2nd string */ +#define rADDR1 r8 /* Address of tolower(rCHAR1) */ +#define rADDR2 r12 /* Address of tolower(rCHAR2) */ +#define rLWR1 r8 /* Byte tolower(rCHAR1) */ +#define rLWR2 r12 /* Byte tolower(rCHAR2) */ +#define rTMP r0 +#define rGOT r9 /* Address of the Global Offset Table */ +#define rLOC r11 /* Default locale address */ + + cmpw cr7, r3, r4 +#ifndef USE_IN_EXTENDED_LOCALE_MODEL +# ifdef SHARED + mflr rTMP + bcl 20,31,.L1 +.L1: mflr rGOT + addis rGOT, rGOT, _GLOBAL_OFFSET_TABLE_-.L1@ha + addi rGOT, rGOT, _GLOBAL_OFFSET_TABLE_-.L1@l + lwz rLOC, __libc_tsd_LOCALE@got@tprel(rGOT) + add rLOC, rLOC, __libc_tsd_LOCALE@tls + lwz rLOC, 0(rLOC) + mtlr rTMP +# else + lis rTMP,_GLOBAL_OFFSET_TABLE_@ha + la rLOC,_GLOBAL_OFFSET_TABLE_@l(rTMP) + lwz rLOC, __libc_tsd_LOCALE@got@tprel(rGOT) + add rLOC, rLOC, __libc_tsd_LOCALE@tls + lwz rLOC, 0(rLOC) +# endif /* SHARED */ +#else + mr rLOC, rLOCARG +#endif + mr rSTR1, rRTN + lwz rLOC, LOCALE_CTYPE_TOLOWER(rLOC) + li rRTN, 0 + beqlr cr7 + + /* Unrolling loop for POWER: loads are done with 'lbz' plus + offset and string descriptors are only updated in the end + of loop unrolling. */ + +L(loop): + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ + sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */ + sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */ + lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */ + lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */ + cmpwi cr7, rCHAR1, 0 /* *s1 == '\0' ? */ + subf. r3, rLWR2, rLWR1 + bnelr + beqlr cr7 + lbz rCHAR1, 1(rSTR1) + lbz rCHAR2, 1(rSTR2) + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpwi cr7, rCHAR1, 0 + subf. r3, rLWR2, rLWR1 + bnelr + beqlr cr7 + lbz rCHAR1, 2(rSTR1) + lbz rCHAR2, 2(rSTR2) + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpwi cr7, rCHAR1, 0 + subf. r3, rLWR2, rLWR1 + bnelr + beqlr cr7 + lbz rCHAR1, 3(rSTR1) + lbz rCHAR2, 3(rSTR2) + /* Increment both string descriptors */ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpwi cr7, rCHAR1, 0 + subf. r3, rLWR2, rLWR1 + bnelr + bne cr7,L(loop) + blr +END (__STRCMP) + +weak_alias (__STRCMP, STRCMP) +libc_hidden_builtin_def (__STRCMP) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strcasecmp_l.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strcasecmp_l.S new file mode 100644 index 0000000000..c13c4ebcb8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strcasecmp_l.S @@ -0,0 +1,5 @@ +#define USE_IN_EXTENDED_LOCALE_MODEL +#define STRCMP strcasecmp_l +#define __STRCMP __strcasecmp_l + +#include "strcasecmp.S" diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strchr.S new file mode 100644 index 0000000000..75ca6acb98 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strchr.S @@ -0,0 +1,225 @@ +/* Optimized strchr implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] strchr (char *s [r3], int c [r4]) */ + .machine power7 +ENTRY (strchr) + CALL_MCOUNT + dcbt 0,r3 + clrrwi r8,r3,2 /* Align the address to word boundary. */ + cmpwi cr7,r4,0 + lwz r12,0(r8) /* Load word from memory. */ + li r0,0 /* Word with null chars to use + with cmpb. */ + + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + + beq cr7,L(null_match) + + /* Replicate byte to word. */ + insrwi r4,r4,8,16 + insrwi r4,r4,16,0 + + /* Now r4 has a word of c bytes and r0 has + a word of null bytes. */ + + cmpb r10,r12,r4 /* Compare each byte against c byte. */ + cmpb r11,r12,r0 /* Compare each byte against null byte. */ + + /* Move the words left and right to discard the bits that are + not part of the string and to bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srw r10,r10,r6 + srw r11,r11,r6 + slw r10,r10,r6 + slw r11,r11,r6 +#else + slw r10,r10,r6 + slw r11,r11,r6 + srw r10,r10,r6 + srw r11,r11,r6 +#endif + or r5,r10,r11 /* OR the results to speed things up. */ + cmpwi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 29,L(loop) + + /* Handle WORD2 of pair. */ + lwzu r12,4(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + or r5,r10,r11 + cmpwi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + .p2align 5 +L(loop): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + lwz r12,4(r8) + lwzu r9,8(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + cmpb r6,r9,r4 + cmpb r7,r9,r0 + or r12,r10,r11 + or r9,r6,r7 + or r5,r12,r9 + cmpwi cr7,r5,0 + beq cr7,L(loop) + + /* OK, one (or both) of the words contains a c/null byte. Check + the first word and decrement the address in case the first + word really contains a c/null byte. */ + + cmpwi cr6,r12,0 + addi r8,r8,-4 + bne cr6,L(done) + + /* The c/null byte must be in the second word. Adjust the address + again and move the result of cmpb to r10/r11 so we can calculate + the pointer. */ + + mr r10,r6 + mr r11,r7 + addi r8,r8,4 + + /* r10/r11 have the output of the cmpb instructions, that is, + 0xff in the same position as the c/null byte in the original + word from the string. Use that to calculate the pointer. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r3,r10,-1 + andc r3,r3,r10 + popcntw r0,r3 + addi r4,r11,-1 + andc r4,r4,r11 + cmplw cr7,r3,r4 + bgt cr7,L(no_match) +#else + cntlzw r0,r10 /* Count leading zeros before c matches. */ + cmplw cr7,r11,r10 + bgt cr7,L(no_match) +#endif + srwi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching c byte + or null in case c was not found. */ + blr + + .align 4 +L(no_match): + li r3,0 + blr + +/* We are here because strchr was called with a null byte. */ + .align 4 +L(null_match): + /* r0 has a word of null bytes. */ + + cmpb r5,r12,r0 /* Compare each byte against null bytes. */ + + /* Move the words left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srw r5,r5,r6 + slw r5,r5,r6 +#else + slw r5,r5,r6 + srw r5,r5,r6 +#endif + cmpwi cr7,r5,0 /* If r10 == 0, no c or null bytes + have been found. */ + bne cr7,L(done_null) + + mtcrf 0x01,r8 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 29,L(loop_null) + + /* Handle WORD2 of pair. */ + lwzu r12,4(r8) + cmpb r5,r12,r0 + cmpwi cr7,r5,0 + bne cr7,L(done_null) + b L(loop_null) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop_null): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + lwz r12,4(r8) + lwzu r11,8(r8) + cmpb r5,r12,r0 + cmpb r10,r11,r0 + or r6,r5,r10 + cmpwi cr7,r6,0 + beq cr7,L(loop_null) + + /* OK, one (or both) of the words contains a null byte. Check + the first word and decrement the address in case the first + word really contains a null byte. */ + + cmpwi cr6,r5,0 + addi r8,r8,-4 + bne cr6,L(done_null) + + /* The null byte must be in the second word. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + + mr r5,r10 + addi r8,r8,4 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + word from the string. Use that to calculate the pointer. */ +L(done_null): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntw r0,r0 +#else + cntlzw r0,r5 /* Count leading zeros before the match. */ +#endif + srwi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching null byte. */ + blr +END (strchr) +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strchrnul.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strchrnul.S new file mode 100644 index 0000000000..426137e11d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strchrnul.S @@ -0,0 +1,127 @@ +/* Optimized strchrnul implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] strchrnul (char *s [r3], int c [r4]) */ + .machine power7 +ENTRY (__strchrnul) + CALL_MCOUNT + dcbt 0,r3 + clrrwi r8,r3,2 /* Align the address to word boundary. */ + + /* Replicate byte to word. */ + insrwi r4,r4,8,16 + insrwi r4,r4,16,0 + + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + lwz r12,0(r8) /* Load word from memory. */ + li r0,0 /* Word with null chars to use + with cmpb. */ + + /* Now r4 has a word of c bytes and r0 has + a word of null bytes. */ + + cmpb r10,r12,r0 /* Compare each byte against c byte. */ + cmpb r9,r12,r4 /* Compare each byte against null byte. */ + + /* Move the words left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srw r10,r10,r6 + srw r9,r9,r6 + slw r10,r10,r6 + slw r9,r9,r6 +#else + slw r10,r10,r6 + slw r9,r9,r6 + srw r10,r10,r6 + srw r9,r9,r6 +#endif + or r5,r9,r10 /* OR the results to speed things up. */ + cmpwi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 29,L(loop) + + /* Handle WORD2 of pair. */ + lwzu r12,4(r8) + cmpb r10,r12,r0 + cmpb r9,r12,r4 + or r5,r9,r10 + cmpwi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + .p2align 5 +L(loop): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + lwz r12,4(r8) + lwzu r11,8(r8) + cmpb r10,r12,r0 + cmpb r9,r12,r4 + cmpb r6,r11,r0 + cmpb r7,r11,r4 + or r5,r9,r10 + or r10,r6,r7 + or r11,r5,r10 + cmpwi cr7,r11,0 + beq cr7,L(loop) + + /* OK, one (or both) of the words contains a c/null byte. Check + the first word and decrement the address in case the first + word really contains a c/null byte. */ + + cmpwi cr6,r5,0 + addi r8,r8,-4 + bne cr6,L(done) + + /* The c/null byte must be in the second word. Adjust the address + again and move the result of cmpb to r5 so we can calculate the + pointer. */ + mr r5,r10 + addi r8,r8,4 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the c/null byte in the original + word from the string. Use that to calculate the pointer. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntw r0,r0 +#else + cntlzw r0,r5 /* Count leading zeros before the match. */ +#endif + srwi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of matching c/null byte. */ + blr +END (__strchrnul) +weak_alias (__strchrnul,strchrnul) +libc_hidden_builtin_def (__strchrnul) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strlen.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strlen.S new file mode 100644 index 0000000000..3699791fa6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strlen.S @@ -0,0 +1,102 @@ +/* Optimized strlen implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] strlen (char *s [r3]) */ + .machine power7 +ENTRY (strlen) + CALL_MCOUNT + dcbt 0,r3 + clrrwi r4,r3,2 /* Align the address to word boundary. */ + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + li r0,0 /* Word with null chars to use with cmpb. */ + li r5,-1 /* MASK = 0xffffffffffffffff. */ + lwz r12,0(r4) /* Load word from memory. */ +#ifdef __LITTLE_ENDIAN__ + slw r5,r5,r6 +#else + srw r5,r5,r6 /* MASK = MASK >> padding. */ +#endif + orc r9,r12,r5 /* Mask bits that are not part of the string. */ + cmpb r10,r9,r0 /* Check for null bytes in WORD1. */ + cmpwi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r4 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 29,L(loop) + + /* Handle WORD2 of pair. */ + lwzu r12,4(r4) + cmpb r10,r12,r0 + cmpwi cr7,r10,0 + bne cr7,L(done) + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + + lwz r12, 4(r4) + lwzu r11, 8(r4) + cmpb r10,r12,r0 + cmpb r9,r11,r0 + or r8,r9,r10 /* Merge everything in one word. */ + cmpwi cr7,r8,0 + beq cr7,L(loop) + + /* OK, one (or both) of the words contains a null byte. Check + the first word and decrement the address in case the first + word really contains a null byte. */ + + cmpwi cr6,r10,0 + addi r4,r4,-4 + bne cr6,L(done) + + /* The null byte must be in the second word. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + length. */ + + mr r10,r9 + addi r4,r4,4 + + /* r10 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + word from the string. Use that to calculate the length. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r9, r10, -1 /* Form a mask from trailing zeros. */ + andc r9, r9, r10 + popcntw r0, r9 /* Count the bits in the mask. */ +#else + cntlzw r0,r10 /* Count leading zeros before the match. */ +#endif + subf r5,r3,r4 + srwi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r5,r0 /* Compute final length. */ + blr +END (strlen) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strncmp.S new file mode 100644 index 0000000000..d4598e1930 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strncmp.S @@ -0,0 +1,199 @@ +/* Optimized strcmp implementation for POWER7/PowerPC32. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strncmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + +EALIGN (strncmp,5,0) + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r10 +#define rWORD4 r11 +#define rFEFE r8 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r9 /* constant 0x7f7f7f7f */ +#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */ +#define rBITDIF r11 /* bits that differ in s1 & s2 words */ +#define rTMP r12 + + dcbt 0,rSTR1 + nop + or rTMP,rSTR2,rSTR1 + lis r7F7F,0x7f7f + dcbt 0,rSTR2 + nop + clrlwi. rTMP,rTMP,30 + cmplwi cr1,rN,0 + lis rFEFE,-0x101 + bne L(unaligned) +/* We are word aligned so set up for two loops. first a word + loop, then fall into the byte loop if any residual. */ + srwi. rTMP,rN,2 + clrlwi rN,rN,30 + addi rFEFE,rFEFE,-0x101 + addi r7F7F,r7F7F,0x7f7f + cmplwi cr1,rN,0 + beq L(unaligned) + + mtctr rTMP + lwz rWORD1,0(rSTR1) + lwz rWORD2,0(rSTR2) + b L(g1) + +L(g0): + lwzu rWORD1,4(rSTR1) + bne cr1,L(different) + lwzu rWORD2,4(rSTR2) +L(g1): add rTMP,rFEFE,rWORD1 + nor rNEG,r7F7F,rWORD1 + bdz L(tail) + and. rTMP,rTMP,rNEG + cmpw cr1,rWORD1,rWORD2 + beq L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ +#ifdef __LITTLE_ENDIAN__ +L(endstring): + slwi rTMP, rTMP, 1 + addi rTMP2, rTMP, -1 + andc rTMP2, rTMP2, rTMP + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ + rlwinm rTMP, rWORD1, 8, 0xffffffff + rldimi rTMP2, rWORD2, 24, 32 + rldimi rTMP, rWORD1, 24, 32 + rlwimi rTMP2, rWORD2, 24, 16, 23 + rlwimi rTMP, rWORD1, 24, 16, 23 + xor. rBITDIF, rTMP, rTMP2 + sub rRTN, rTMP, rTMP2 + bgelr + ori rRTN, rTMP2, 1 + blr + +L(different): + lwz rWORD1, -4(rSTR1) + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ + rlwinm rTMP, rWORD1, 8, 0xffffffff + rldimi rTMP2, rWORD2, 24, 32 + rldimi rTMP, rWORD1, 24, 32 + rlwimi rTMP2, rWORD2, 24, 16, 23 + rlwimi rTMP, rWORD1, 24, 16, 23 + xor. rBITDIF, rTMP, rTMP2 + sub rRTN, rTMP, rTMP2 + bgelr + ori rRTN, rTMP2, 1 + blr + +#else +L(endstring): + and rTMP,r7F7F,rWORD1 + beq cr1,L(equal) + add rTMP,rTMP,r7F7F + xor. rBITDIF,rWORD1,rWORD2 + andc rNEG,rNEG,rTMP + blt L(highbit) + cntlzw rBITDIF,rBITDIF + cntlzw rNEG,rNEG + addi rNEG,rNEG,7 + cmpw cr1,rNEG,rBITDIF + sub rRTN,rWORD1,rWORD2 + bgelr cr1 +L(equal): + li rRTN,0 + blr + +L(different): + lwz rWORD1,-4(rSTR1) + xor. rBITDIF,rWORD1,rWORD2 + sub rRTN,rWORD1,rWORD2 + bgelr +L(highbit): + ori rRTN, rWORD2, 1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(tail): + and. rTMP,rTMP,rNEG + cmpw cr1,rWORD1,rWORD2 + bne L(endstring) + addi rSTR1,rSTR1,4 + bne cr1,L(different) + addi rSTR2,rSTR2,4 + cmplwi cr1,rN,0 +L(unaligned): + mtctr rN + ble cr1,L(ux) +L(uz): + lbz rWORD1,0(rSTR1) + lbz rWORD2,0(rSTR2) + .align 4 +L(u1): + cmpwi cr1,rWORD1,0 + bdz L(u4) + cmpw rWORD1,rWORD2 + beq cr1,L(u4) + bne L(u4) + lbzu rWORD3,1(rSTR1) + lbzu rWORD4,1(rSTR2) + cmpwi cr1,rWORD3,0 + bdz L(u3) + cmpw rWORD3,rWORD4 + beq cr1,L(u3) + bne L(u3) + lbzu rWORD1,1(rSTR1) + lbzu rWORD2,1(rSTR2) + cmpwi cr1,rWORD1,0 + bdz L(u4) + cmpw rWORD1,rWORD2 + beq cr1,L(u4) + bne L(u4) + lbzu rWORD3,1(rSTR1) + lbzu rWORD4,1(rSTR2) + cmpwi cr1,rWORD3,0 + bdz L(u3) + cmpw rWORD3,rWORD4 + beq cr1,L(u3) + bne L(u3) + lbzu rWORD1,1(rSTR1) + lbzu rWORD2,1(rSTR2) + b L(u1) + +L(u3): sub rRTN,rWORD3,rWORD4 + blr +L(u4): sub rRTN,rWORD1,rWORD2 + blr +L(ux): + li rRTN,0 + blr +END (strncmp) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strnlen.S b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strnlen.S new file mode 100644 index 0000000000..6019d5be5b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power7/strnlen.S @@ -0,0 +1,176 @@ +/* Optimized strnlen implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] strnlen (char *s [r3], int size [r4]) */ + .machine power7 +ENTRY (__strnlen) + CALL_MCOUNT + dcbt 0,r3 + clrrwi r8,r3,2 /* Align the address to word boundary. */ + add r7,r3,r4 /* Calculate the last acceptable address. */ + cmplwi r4,16 + li r0,0 /* Word with null chars. */ + addi r7,r7,-1 + ble L(small_range) + + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + lwz r12,0(r8) /* Load word from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */ +#ifdef __LITTLE_ENDIAN__ + srw r10,r10,r6 + slw r10,r10,r6 +#else + slw r10,r10,r6 + srw r10,r10,r6 +#endif + cmplwi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + clrrwi r7,r7,2 /* Address of last word. */ + mtcrf 0x01,r8 + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 29,L(loop_setup) + + /* Handle WORD2 of pair. */ + lwzu r12,4(r8) + cmpb r10,r12,r0 + cmplwi cr7,r10,0 + bne cr7,L(done) + +L(loop_setup): + /* The last word we want to read in the loop below is the one + containing the last byte of the string, ie. the word at + (s + size - 1) & ~3, or r7. The first word read is at + r8 + 4, we read 2 * cnt words, so the last word read will + be at r8 + 4 + 8 * cnt - 4. Solving for cnt gives + cnt = (r7 - r8) / 8 */ + sub r5,r7,r8 + srwi r6,r5,3 /* Number of loop iterations. */ + mtctr r6 /* Setup the counter. */ + + /* Main loop to look for the null byte in the string. Since + it's a small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + + lwz r12,4(r8) + lwzu r11,8(r8) + cmpb r10,r12,r0 + cmpb r9,r11,r0 + or r5,r9,r10 /* Merge everything in one word. */ + cmplwi cr7,r5,0 + bne cr7,L(found) + bdnz L(loop) + + /* We may have one more word to read. */ + cmplw cr6,r8,r7 + beq cr6,L(end_max) + + lwzu r12,4(r8) + cmpb r10,r12,r0 + cmplwi cr6,r10,0 + bne cr6,L(done) + +L(end_max): + mr r3,r4 + blr + + /* OK, one (or both) of the words contains a null byte. Check + the first word and decrement the address in case the first + word really contains a null byte. */ + .align 4 +L(found): + cmplwi cr6,r10,0 + addi r8,r8,-4 + bne cr6,L(done) + + /* The null byte must be in the second word. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + length. */ + + mr r10,r9 + addi r8,r8,4 + + /* r10 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + word from the string. Use that to calculate the length. + We need to make sure the null char is *before* the end of the + range. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r10,-1 + andc r0,r0,r10 + popcntw r0,r0 +#else + cntlzw r0,r10 /* Count leading zeros before the match. */ +#endif + sub r3,r8,r3 + srwi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r3,r0 /* Length until the match. */ + cmplw r3,r4 + blelr + mr r3,r4 + blr + +/* Deals with size <= 16. */ + .align 4 +L(small_range): + cmplwi r4,0 + beq L(end_max) + + clrrwi r7,r7,2 /* Address of last word. */ + + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + lwz r12,0(r8) /* Load word from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in WORD1. */ +#ifdef __LITTLE_ENDIAN__ + srw r10,r10,r6 + slw r10,r10,r6 +#else + slw r10,r10,r6 + srw r10,r10,r6 +#endif + cmplwi cr7,r10,0 + bne cr7,L(done) + + cmplw r8,r7 + beq L(end_max) + + .p2align 5 +L(loop_small): + lwzu r12,4(r8) + cmpb r10,r12,r0 + cmplwi cr6,r10,0 + bne cr6,L(done) + cmplw r8,r7 + bne L(loop_small) + mr r3,r4 + blr + +END (__strnlen) +libc_hidden_def (__strnlen) +weak_alias (__strnlen, strnlen) +libc_hidden_builtin_def (strnlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power8/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power8/Implies new file mode 100644 index 0000000000..083f3e950a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power8/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/power7/fpu +powerpc/powerpc32/power7 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power8/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power8/fpu/multiarch/Implies new file mode 100644 index 0000000000..43a3b83e2a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power8/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power7/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power8/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power8/multiarch/Implies new file mode 100644 index 0000000000..f18504408f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power8/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power7/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power9/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power9/Implies new file mode 100644 index 0000000000..066dea2798 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power9/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc32/power8/fpu +powerpc/powerpc32/power8 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power9/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power9/fpu/multiarch/Implies new file mode 100644 index 0000000000..4393b56872 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power9/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power8/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/power9/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc32/power9/multiarch/Implies new file mode 100644 index 0000000000..1a46ef0035 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/power9/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc32/power8/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/ppc-mcount.S b/REORG.TODO/sysdeps/powerpc/powerpc32/ppc-mcount.S new file mode 100644 index 0000000000..8a6b205c37 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/ppc-mcount.S @@ -0,0 +1,104 @@ +/* PowerPC-specific implementation of profiling support. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This would be bad. */ +#ifdef PROF +#undef PROF +#endif + +#include <sysdep.h> + +/* We do profiling as described in the SYSV ELF ABI, except that glibc + _mcount manages its own counters. The caller has put the address the + caller will return to in the usual place on the stack, 4(r1). _mcount + is responsible for ensuring that when it returns no argument-passing + registers are disturbed, and that the LR is set back to (what the + caller sees as) 4(r1). + + This is intended so that the following code can be inserted at the + front of any routine without changing the routine: + + .data + mflr r0 + stw r0,4(r1) + bl _mcount +*/ + +ENTRY(_mcount) +#if defined PIC && !defined SHARED +# define CALLER_LR_OFFSET 68 + stwu r1,-64(r1) + cfi_adjust_cfa_offset (64) + stw r30, 48(r1) + cfi_rel_offset (r30, 48) +#else +# define CALLER_LR_OFFSET 52 + stwu r1,-48(r1) + cfi_adjust_cfa_offset (48) +#endif +/* We need to save the parameter-passing registers. */ + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + mflr r4 +#if defined PIC && !defined SHARED + bcl 20,31,0f +0: + mflr r30 + addis r30, r30, _GLOBAL_OFFSET_TABLE_-0b@ha + addi r30, r30, _GLOBAL_OFFSET_TABLE_-0b@l +#endif + lwz r3, CALLER_LR_OFFSET(r1) + mfcr r5 + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + stw r4, 44(r1) + cfi_rel_offset (lr, 44) + stw r5, 8(r1) +#ifndef SHARED + bl JUMPTARGET(__mcount_internal) +#else + bl __mcount_internal@local +#endif + /* Restore the registers... */ + lwz r6, 8(r1) + lwz r0, 44(r1) + lwz r3, 12(r1) + mtctr r0 + lwz r4, 16(r1) + mtcrf 0xff,r6 + lwz r5, 20(r1) + lwz r6, 24(r1) + lwz r0, CALLER_LR_OFFSET(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + mtlr r0 + lwz r9, 36(r1) + lwz r10,40(r1) + /* ...unwind the stack frame, and return to your usual programming. */ +#if defined PIC && !defined SHARED + lwz r30, 48(r1) + addi r1,r1,64 +#else + addi r1,r1,48 +#endif + bctr +END(_mcount) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/register-dump.h b/REORG.TODO/sysdeps/powerpc/powerpc32/register-dump.h new file mode 100644 index 0000000000..6e533a75a3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/register-dump.h @@ -0,0 +1,120 @@ +/* Dump registers. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sys/uio.h> +#include <_itoa.h> + +/* This prints out the information in the following form: */ +static const char dumpform[] = "\ +Register dump:\n\ +fp0-3: 0000030%0000031% 0000032%0000033% 0000034%0000035% 0000036%0000037%\n\ +fp4-7: 0000038%0000039% 000003a%000003b% 000003c%000003d% 000003e%000003f%\n\ +fp8-11: 0000040%0000041% 0000042%0000043% 0000044%0000045% 0000046%0000047%\n\ +fp12-15: 0000048%0000049% 000004a%000004b% 000004c%000004d% 000004e%000004f%\n\ +fp16-19: 0000050%0000051% 0000052%0000053% 0000054%0000055% 0000056%0000057%\n\ +fp20-23: 0000058%0000059% 000005a%000005b% 000005c%000005d% 000005e%000005f%\n\ +fp24-27: 0000060%0000061% 0000062%0000063% 0000064%0000065% 0000066%0000067%\n\ +fp28-31: 0000068%0000069% 000006a%000006b% 000006c%000006d% 000006e%000006f%\n\ +r0 =0000000% sp =0000001% r2 =0000002% r3 =0000003% trap=0000028%\n\ +r4 =0000004% r5 =0000005% r6 =0000006% r7 =0000007% sr0=0000020% sr1=0000021%\n\ +r8 =0000008% r9 =0000009% r10=000000a% r11=000000b% dar=0000029% dsi=000002a%\n\ +r12=000000c% r13=000000d% r14=000000e% r15=000000f% r3*=0000022%\n\ +r16=0000010% r17=0000011% r18=0000012% r19=0000013%\n\ +r20=0000014% r21=0000015% r22=0000016% r23=0000017% lr=0000024% xer=0000025%\n\ +r24=0000018% r25=0000019% r26=000001a% r27=000001b% mq=0000027% ctr=0000023%\n\ +r28=000001c% r29=000001d% r30=000001e% r31=000001f% fscr=0000071% ccr=0000026%\n\ +"; + +/* Most of the fields are self-explanatory. 'sr0' is the next + instruction to execute, from SRR0, which may have some relationship + with the instruction that caused the exception. 'r3*' is the value + that will be returned in register 3 when the current system call + returns. 'sr1' is SRR1, bits 16-31 of which are copied from the MSR: + + 16 - External interrupt enable + 17 - Privilege level (1=user, 0=supervisor) + 18 - FP available + 19 - Machine check enable (if clear, processor locks up on machine check) + 20 - FP exception mode bit 0 (FP exceptions recoverable) + 21 - Single-step trace enable + 22 - Branch trace enable + 23 - FP exception mode bit 1 + 25 - exception prefix (if set, exceptions are taken from 0xFFFnnnnn, + otherwise from 0x000nnnnn). + 26 - Instruction address translation enabled. + 27 - Data address translation enabled. + 30 - Exception is recoverable (otherwise, don't try to return). + 31 - Little-endian mode enable. + + 'Trap' is the address of the exception: + + 00200 - Machine check exception (memory parity error, for instance) + 00300 - Data access exception (memory not mapped, see dsisr for why) + 00400 - Instruction access exception (memory not mapped) + 00500 - External interrupt + 00600 - Alignment exception (see dsisr for more information) + 00700 - Program exception (illegal/trap instruction, FP exception) + 00800 - FP unavailable (should not be seen by user code) + 00900 - Decrementer exception (for instance, SIGALRM) + 00A00 - I/O controller interface exception + 00C00 - System call exception (for instance, kill(3)). + 00E00 - FP assist exception (optional FP instructions, etc.) + + 'dar' is the memory location, for traps 00300, 00400, 00600, 00A00. + 'dsisr' has the following bits under trap 00300: + 0 - direct-store error exception + 1 - no page table entry for page + 4 - memory access not permitted + 5 - trying to access I/O controller space or using lwarx/stwcx on + non-write-cached memory + 6 - access was store + 9 - data access breakpoint hit + 10 - segment table search failed to find translation (64-bit ppcs only) + 11 - I/O controller instruction not permitted + For trap 00400, the same bits are set in SRR1 instead. + For trap 00600, bits 12-31 of the DSISR set to allow emulation of + the instruction without actually having to read it from memory. +*/ + +#define xtoi(x) (x >= 'a' ? x + 10 - 'a' : x - '0') + +static void +register_dump (int fd, struct sigcontext *ctx) +{ + char buffer[sizeof(dumpform)]; + char *bufferpos; + unsigned regno; + unsigned *regs = (unsigned *)(ctx->regs); + + memcpy(buffer, dumpform, sizeof(dumpform)); + + /* Generate the output. */ + while ((bufferpos = memchr (buffer, '%', sizeof(dumpform)))) + { + regno = xtoi (bufferpos[-1]) | xtoi (bufferpos[-2]) << 4; + memset (bufferpos-2, '0', 3); + _itoa_word (regs[regno], bufferpos+1, 16, 0); + } + + /* Write the output. */ + write (fd, buffer, sizeof(buffer) - 1); +} + + +#define REGISTER_DUMP \ + register_dump (fd, ctx) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/rshift.S b/REORG.TODO/sysdeps/powerpc/powerpc32/rshift.S new file mode 100644 index 0000000000..58abdcad59 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/rshift.S @@ -0,0 +1,55 @@ +/* Shift a limb right, low level routine. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* INPUT PARAMETERS + res_ptr r3 + s1_ptr r4 + size r5 + cnt r6 */ + +ENTRY (__mpn_rshift) + mtctr r5 # copy size into CTR + addi r7,r3,-4 # move adjusted res_ptr to free return reg + subfic r8,r6,32 + lwz r11,0(r4) # load first s1 limb + slw r3,r11,r8 # compute function return value + bdz L(1) + +L(0): lwzu r10,4(r4) + srw r9,r11,r6 + slw r12,r10,r8 + or r9,r9,r12 + stwu r9,4(r7) + bdz L(2) + lwzu r11,4(r4) + srw r9,r10,r6 + slw r12,r11,r8 + or r9,r9,r12 + stwu r9,4(r7) + bdnz L(0) + +L(1): srw r0,r11,r6 + stw r0,4(r7) + blr + +L(2): srw r0,r10,r6 + stw r0,4(r7) + blr +END (__mpn_rshift) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/rtld-memset.c b/REORG.TODO/sysdeps/powerpc/powerpc32/rtld-memset.c new file mode 100644 index 0000000000..f3ed8ad1e7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/rtld-memset.c @@ -0,0 +1,4 @@ +/* PPCA2 has a different cache-line size than the usual 128 bytes. To avoid + using code that assumes cache-line size to be 128 bytes (with dcbz + instructions) we use the generic code instead. */ +#include <string/memset.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/setjmp-common.S b/REORG.TODO/sysdeps/powerpc/powerpc32/setjmp-common.S new file mode 100644 index 0000000000..c74c492cec --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/setjmp-common.S @@ -0,0 +1,78 @@ +/* setjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <stap-probe.h> +#define _ASM +#ifdef __NO_VMX__ +# include <novmxsetjmp.h> +#else +# include <jmpbuf-offsets.h> +#endif + +#if defined __SPE__ || (defined __NO_FPRS__ && !defined _SOFT_FLOAT) +# define SAVE_GP(N) evstdd r##N,((JB_FPRS+((N)-14)*2)*4)(3) +#else +# define SAVE_GP(N) stw r##N,((JB_GPRS+(N)-14)*4)(3) +#endif + +ENTRY (__sigsetjmp_symbol) + +#ifdef PTR_MANGLE + mr r5,r1 + PTR_MANGLE(r5, r10) + stw r5,(JB_GPR1*4)(3) +#else + stw r1,(JB_GPR1*4)(3) +#endif + mflr r0 + /* setjmp probe expects longjmp first argument (4@3), second argument + (-4@4), and target address (4@0), respectively. */ + LIBC_PROBE (setjmp, 3, 4@3, -4@4, 4@0) + SAVE_GP (14) +#ifdef PTR_MANGLE + PTR_MANGLE2 (r0, r10) + li r10,0 +#endif + stw r0,(JB_LR*4)(3) + SAVE_GP (15) + mfcr r0 + SAVE_GP (16) + stw r0,(JB_CR*4)(3) + SAVE_GP (17) + SAVE_GP (18) + SAVE_GP (19) + SAVE_GP (20) + SAVE_GP (21) + SAVE_GP (22) + SAVE_GP (23) + SAVE_GP (24) + SAVE_GP (25) + SAVE_GP (26) + SAVE_GP (27) + SAVE_GP (28) + SAVE_GP (29) + SAVE_GP (30) + SAVE_GP (31) +#if IS_IN (rtld) + li r3,0 + blr +#else + b __sigjmp_save_symbol@local +#endif +END (__sigsetjmp_symbol) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/setjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/setjmp.S new file mode 100644 index 0000000000..2800466276 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/setjmp.S @@ -0,0 +1,46 @@ +/* non altivec (old) version of setjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <shlib-compat.h> +#include <libc-symbols.h> + +#if !IS_IN (libc) +/* Build a non-versioned object for rtld-*. */ +# define __sigsetjmp_symbol __sigsetjmp +# define __sigjmp_save_symbol __sigjmp_save +# include "setjmp-common.S" + +#else /* IS_IN (libc) */ +/* Build a versioned object for libc. */ +versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4) +# define __sigsetjmp_symbol __vmx__sigsetjmp +# define __sigjmp_save_symbol __vmx__sigjmp_save +# include "setjmp-common.S" +libc_hidden_ver (__vmx__sigsetjmp, __sigsetjmp) + +# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +# define __NO_VMX__ +# undef __sigsetjmp_symbol +# undef __sigjmp_save_symbol +# undef JB_SIZE +compat_symbol (libc, __novmx__sigsetjmp, __sigsetjmp, GLIBC_2_0) +# define __sigsetjmp_symbol __novmx__sigsetjmp +# define __sigjmp_save_symbol __novmx__sigjmp_save +# include "setjmp-common.S" +# endif +#endif /* IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/stackguard-macros.h b/REORG.TODO/sysdeps/powerpc/powerpc32/stackguard-macros.h new file mode 100644 index 0000000000..b3d0af830f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/stackguard-macros.h @@ -0,0 +1,14 @@ +#include <stdint.h> + +#define STACK_CHK_GUARD \ + ({ uintptr_t x; asm ("lwz %0,-28680(2)" : "=r" (x)); x; }) + +#define POINTER_CHK_GUARD \ + ({ \ + uintptr_t x; \ + asm ("lwz %0,%1(2)" \ + : "=r" (x) \ + : "i" (offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) \ + ); \ + x; \ + }) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/start.S b/REORG.TODO/sysdeps/powerpc/powerpc32/start.S new file mode 100644 index 0000000000..d510a56c0f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/start.S @@ -0,0 +1,95 @@ +/* Startup code for programs linked with GNU libc. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* We do not want .eh_frame info for crt1.o since crt1.o is linked + before crtbegin.o, the file defining __EH_FRAME_BEGIN__. */ +#undef cfi_startproc +#define cfi_startproc +#undef cfi_endproc +#define cfi_endproc + + /* These are the various addresses we require. */ +#ifdef PIC + .section ".data" +#else + .section ".rodata" +#endif + .align 2 +L(start_addresses): + .long _SDA_BASE_ + .long main + .long __libc_csu_init + .long __libc_csu_fini + ASM_SIZE_DIRECTIVE(L(start_addresses)) + + .section ".text" +ENTRY(_start) + /* Save the stack pointer, in case we're statically linked under Linux. */ + mr r9,r1 + /* Set up an initial stack frame, and clear the LR. */ + clrrwi r1,r1,4 +#ifdef PIC + SETUP_GOT_ACCESS(r13,got_label) + li r0,0 +#else + li r0,0 +#endif + stwu r1,-16(r1) + mtlr r0 + stw r0,0(r1) + /* Set r13 to point at the 'small data area', and put the address of + start_addresses in r8. Also load the GOT pointer so that new PLT + calls work, like the one to __libc_start_main. */ +#ifdef PIC + addis r30,r13,_GLOBAL_OFFSET_TABLE_-got_label@ha + addis r8,r13,L(start_addresses)-got_label@ha + addi r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l + lwzu r13, L(start_addresses)-got_label@l(r8) +#else + lis r8,L(start_addresses)@ha + lwzu r13,L(start_addresses)@l(r8) +#endif + /* and continue in libc-start, in glibc. */ + b JUMPTARGET(__libc_start_main) +END(_start) + +/* Define a symbol for the first piece of initialized data. */ + .section ".data" + .globl __data_start +__data_start: + .long 0 +weak_alias (__data_start, data_start) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/stpcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/stpcpy.S new file mode 100644 index 0000000000..6ef249f80d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/stpcpy.S @@ -0,0 +1,119 @@ +/* Optimized stpcpy implementation for PowerPC. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* char * [r3] stpcpy (char *dest [r3], const char *src [r4]) */ + +EALIGN (__stpcpy, 4, 0) + +#define rTMP r0 +#define rRTN r3 +#define rDEST r3 /* pointer to previous word in dest */ +#define rSRC r4 /* pointer to previous word in src */ +#define rWORD r6 /* current word from src */ +#define rFEFE r7 /* 0xfefefeff */ +#define r7F7F r8 /* 0x7f7f7f7f */ +#define rNEG r9 /* ~(word in src | 0x7f7f7f7f) */ +#define rALT r10 /* alternate word from src */ + + + or rTMP, rSRC, rDEST + clrlwi. rTMP, rTMP, 30 + addi rDEST, rDEST, -4 + bne L(unaligned) + + lis rFEFE, -0x101 + lis r7F7F, 0x7f7f + lwz rWORD, 0(rSRC) + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + b L(g2) + +L(g0): lwzu rALT, 4(rSRC) + stwu rWORD, 4(rDEST) + add rTMP, rFEFE, rALT + nor rNEG, r7F7F, rALT + and. rTMP, rTMP, rNEG + bne- L(g1) + lwzu rWORD, 4(rSRC) + stwu rALT, 4(rDEST) +L(g2): add rTMP, rFEFE, rWORD + nor rNEG, r7F7F, rWORD + and. rTMP, rTMP, rNEG + beq+ L(g0) + + mr rALT, rWORD +/* We've hit the end of the string. Do the rest byte-by-byte. */ +L(g1): +#ifdef __LITTLE_ENDIAN__ + rlwinm. rTMP, rALT, 0, 24, 31 + stbu rALT, 4(rDEST) + beqlr- + rlwinm. rTMP, rALT, 24, 24, 31 + stbu rTMP, 1(rDEST) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 + stbu rTMP, 1(rDEST) + beqlr- + rlwinm rTMP, rALT, 8, 24, 31 + stbu rTMP, 1(rDEST) + blr +#else + rlwinm. rTMP, rALT, 8, 24, 31 + stbu rTMP, 4(rDEST) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 + stbu rTMP, 1(rDEST) + beqlr- + rlwinm. rTMP, rALT, 24, 24, 31 + stbu rTMP, 1(rDEST) + beqlr- + stbu rALT, 1(rDEST) + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte copy. */ + .align 4 + nop +L(unaligned): + lbz rWORD, 0(rSRC) + addi rDEST, rDEST, 3 + cmpwi rWORD, 0 + beq- L(u2) + +L(u0): lbzu rALT, 1(rSRC) + stbu rWORD, 1(rDEST) + cmpwi rALT, 0 + beq- L(u1) + nop /* Let 601 load start of loop. */ + lbzu rWORD, 1(rSRC) + stbu rALT, 1(rDEST) + cmpwi rWORD, 0 + bne+ L(u0) +L(u2): stbu rWORD, 1(rDEST) + blr +L(u1): stbu rALT, 1(rDEST) + blr +END (__stpcpy) + +weak_alias (__stpcpy, stpcpy) +libc_hidden_def (__stpcpy) +libc_hidden_builtin_def (stpcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc32/strchr.S new file mode 100644 index 0000000000..868cbd46aa --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/strchr.S @@ -0,0 +1,146 @@ +/* Optimized strchr implementation for PowerPC. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how this works. */ + +/* char * [r3] strchr (const char *s [r3] , int c [r4] ) */ + +ENTRY (strchr) + +#define rTMP1 r0 +#define rRTN r3 /* outgoing result */ +#define rSTR r8 /* current word pointer */ +#define rCHR r4 /* byte we're looking for, spread over the whole word */ +#define rWORD r5 /* the current word */ +#define rCLZB rCHR /* leading zero byte count */ +#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r7 /* constant 0x7f7f7f7f */ +#define rTMP2 r9 +#define rIGN r10 /* number of bits we should ignore in the first word */ +#define rMASK r11 /* mask with the bits to ignore set to 0 */ +#define rTMP3 r12 +#define rTMP4 rIGN +#define rTMP5 rMASK + + + rlwimi rCHR, rCHR, 8, 16, 23 + li rMASK, -1 + rlwimi rCHR, rCHR, 16, 0, 15 + rlwinm rIGN, rRTN, 3, 27, 28 + lis rFEFE, -0x101 + lis r7F7F, 0x7f7f + clrrwi rSTR, rRTN, 2 + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f +/* Test the first (partial?) word. */ + lwz rWORD, 0(rSTR) +#ifdef __LITTLE_ENDIAN__ + slw rMASK, rMASK, rIGN +#else + srw rMASK, rMASK, rIGN +#endif + orc rWORD, rWORD, rMASK + add rTMP1, rFEFE, rWORD + nor rTMP2, r7F7F, rWORD + and. rTMP4, rTMP1, rTMP2 + xor rTMP3, rCHR, rWORD + orc rTMP3, rTMP3, rMASK + b L(loopentry) + +/* The loop. */ + +L(loop): + lwzu rWORD, 4(rSTR) + and. rTMP5, rTMP1, rTMP2 +/* Test for 0. */ + add rTMP1, rFEFE, rWORD /* x - 0x01010101. */ + nor rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080. */ + bne L(foundit) + and. rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080. */ +/* Start test for the bytes we're looking for. */ + xor rTMP3, rCHR, rWORD +L(loopentry): + add rTMP1, rFEFE, rTMP3 + nor rTMP2, r7F7F, rTMP3 + beq L(loop) + +/* There is a zero byte in the word, but may also be a matching byte (either + before or after the zero byte). In fact, we may be looking for a + zero byte, in which case we return a match. */ + and. rTMP5, rTMP1, rTMP2 + li rRTN, 0 + beqlr +/* At this point: + rTMP5 bytes are 0x80 for each match of c, 0 otherwise. + rTMP4 bytes are 0x80 for each match of 0, 0 otherwise. + But there may be false matches in the next most significant byte from + a true match due to carries. This means we need to recalculate the + matches using a longer method for big-endian. */ +#ifdef __LITTLE_ENDIAN__ + addi rTMP1, rTMP5, -1 + andc rTMP1, rTMP1, rTMP5 + cntlzw rCLZB, rTMP1 + addi rTMP2, rTMP4, -1 + andc rTMP2, rTMP2, rTMP4 + cmplw rTMP1, rTMP2 + bgtlr + subfic rCLZB, rCLZB, 32-7 +#else +/* I think we could reduce this by two instructions by keeping the "nor" + results from the loop for reuse here. See strlen.S tail. Similarly + one instruction could be pruned from L(foundit). */ + and rFEFE, r7F7F, rWORD + or rTMP5, r7F7F, rWORD + and rTMP1, r7F7F, rTMP3 + or rTMP4, r7F7F, rTMP3 + add rFEFE, rFEFE, r7F7F + add rTMP1, rTMP1, r7F7F + nor rWORD, rTMP5, rFEFE + nor rTMP2, rTMP4, rTMP1 + cntlzw rCLZB, rTMP2 + cmplw rWORD, rTMP2 + bgtlr +#endif + srwi rCLZB, rCLZB, 3 + add rRTN, rSTR, rCLZB + blr + +L(foundit): +#ifdef __LITTLE_ENDIAN__ + addi rTMP1, rTMP5, -1 + andc rTMP1, rTMP1, rTMP5 + cntlzw rCLZB, rTMP1 + subfic rCLZB, rCLZB, 32-7-32 + srawi rCLZB, rCLZB, 3 +#else + and rTMP1, r7F7F, rTMP3 + or rTMP4, r7F7F, rTMP3 + add rTMP1, rTMP1, r7F7F + nor rTMP2, rTMP4, rTMP1 + cntlzw rCLZB, rTMP2 + subi rSTR, rSTR, 4 + srwi rCLZB, rCLZB, 3 +#endif + add rRTN, rSTR, rCLZB + blr +END (strchr) + +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/strcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/strcmp.S new file mode 100644 index 0000000000..52c4bbc1f2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/strcmp.S @@ -0,0 +1,150 @@ +/* Optimized strcmp implementation for PowerPC. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */ + +EALIGN (strcmp, 4, 0) + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rWORD1 r5 /* current word in s1 */ +#define rWORD2 r6 /* current word in s2 */ +#define rFEFE r7 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r8 /* constant 0x7f7f7f7f */ +#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f) */ +#define rBITDIF r10 /* bits that differ in s1 & s2 words */ +#define rTMP r11 + + + or rTMP, rSTR2, rSTR1 + clrlwi. rTMP, rTMP, 30 + lis rFEFE, -0x101 + bne L(unaligned) + + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) + lis r7F7F, 0x7f7f + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + b L(g1) + +L(g0): lwzu rWORD1, 4(rSTR1) + bne cr1, L(different) + lwzu rWORD2, 4(rSTR2) +L(g1): add rTMP, rFEFE, rWORD1 + nor rNEG, r7F7F, rWORD1 + and. rTMP, rTMP, rNEG + cmpw cr1, rWORD1, rWORD2 + beq+ L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ +#ifdef __LITTLE_ENDIAN__ +L(endstring): + addi rTMP2, rTMP, -1 + andc rTMP2, rTMP2, rTMP + rlwimi rTMP2, rTMP2, 1, 0, 30 + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ + rlwinm rTMP, rWORD1, 8, 0xffffffff + rlwimi rTMP2, rWORD2, 24, 0, 7 + rlwimi rTMP, rWORD1, 24, 0, 7 + rlwimi rTMP2, rWORD2, 24, 16, 23 + rlwimi rTMP, rWORD1, 24, 16, 23 + xor. rBITDIF, rTMP, rTMP2 + sub rRTN, rTMP, rTMP2 + bgelr+ + ori rRTN, rTMP2, 1 + blr + +L(different): + lwz rWORD1, -4(rSTR1) + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ + rlwinm rTMP, rWORD1, 8, 0xffffffff + rlwimi rTMP2, rWORD2, 24, 0, 7 + rlwimi rTMP, rWORD1, 24, 0, 7 + rlwimi rTMP2, rWORD2, 24, 16, 23 + rlwimi rTMP, rWORD1, 24, 16, 23 + xor. rBITDIF, rTMP, rTMP2 + sub rRTN, rTMP, rTMP2 + bgelr+ + ori rRTN, rTMP2, 1 + blr + +#else +L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzw rBITDIF, rBITDIF + cntlzw rNEG, rNEG + addi rNEG, rNEG, 7 + cmpw cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + bgelr+ cr1 +L(equal): + li rRTN, 0 + blr + +L(different): + lwz rWORD1, -4(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + bgelr+ +L(highbit): + ori rRTN, rWORD2, 1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(unaligned): + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + b L(u1) + +L(u0): lbzu rWORD1, 1(rSTR1) + bne- L(u4) + lbzu rWORD2, 1(rSTR2) +L(u1): cmpwi cr1, rWORD1, 0 + beq- cr1, L(u3) + cmpw rWORD1, rWORD2 + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + cmpwi cr1, rWORD1, 0 + cmpw rWORD1, rWORD2 + bne+ cr1, L(u0) +L(u3): sub rRTN, rWORD1, rWORD2 + blr +L(u4): lbz rWORD1, -1(rSTR1) + sub rRTN, rWORD1, rWORD2 + blr +END (strcmp) +libc_hidden_builtin_def (strcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/strcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc32/strcpy.S new file mode 100644 index 0000000000..c7af830dda --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/strcpy.S @@ -0,0 +1,117 @@ +/* Optimized strcpy implementation for PowerPC. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* char * [r3] strcpy (char *dest [r3], const char *src [r4]) */ + +EALIGN (strcpy, 4, 0) + +#define rTMP r0 +#define rRTN r3 /* incoming DEST arg preserved as result */ +#define rSRC r4 /* pointer to previous word in src */ +#define rDEST r5 /* pointer to previous word in dest */ +#define rWORD r6 /* current word from src */ +#define rFEFE r7 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r8 /* constant 0x7f7f7f7f */ +#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f) */ +#define rALT r10 /* alternate word from src */ + + + or rTMP, rSRC, rRTN + clrlwi. rTMP, rTMP, 30 + addi rDEST, rRTN, -4 + bne L(unaligned) + + lis rFEFE, -0x101 + lis r7F7F, 0x7f7f + lwz rWORD, 0(rSRC) + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + b L(g2) + +L(g0): lwzu rALT, 4(rSRC) + stwu rWORD, 4(rDEST) + add rTMP, rFEFE, rALT + nor rNEG, r7F7F, rALT + and. rTMP, rTMP, rNEG + bne- L(g1) + lwzu rWORD, 4(rSRC) + stwu rALT, 4(rDEST) +L(g2): add rTMP, rFEFE, rWORD + nor rNEG, r7F7F, rWORD + and. rTMP, rTMP, rNEG + beq+ L(g0) + + mr rALT, rWORD +/* We've hit the end of the string. Do the rest byte-by-byte. */ +L(g1): +#ifdef __LITTLE_ENDIAN__ + rlwinm. rTMP, rALT, 0, 24, 31 + stb rALT, 4(rDEST) + beqlr- + rlwinm. rTMP, rALT, 24, 24, 31 + stb rTMP, 5(rDEST) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 + stb rTMP, 6(rDEST) + beqlr- + rlwinm rTMP, rALT, 8, 24, 31 + stb rTMP, 7(rDEST) + blr +#else + rlwinm. rTMP, rALT, 8, 24, 31 + stb rTMP, 4(rDEST) + beqlr- + rlwinm. rTMP, rALT, 16, 24, 31 + stb rTMP, 5(rDEST) + beqlr- + rlwinm. rTMP, rALT, 24, 24, 31 + stb rTMP, 6(rDEST) + beqlr- + stb rALT, 7(rDEST) + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte copy. */ + .align 4 + nop +L(unaligned): + lbz rWORD, 0(rSRC) + addi rDEST, rRTN, -1 + cmpwi rWORD, 0 + beq- L(u2) + +L(u0): lbzu rALT, 1(rSRC) + stbu rWORD, 1(rDEST) + cmpwi rALT, 0 + beq- L(u1) + nop /* Let 601 load start of loop. */ + lbzu rWORD, 1(rSRC) + stbu rALT, 1(rDEST) + cmpwi rWORD, 0 + bne+ L(u0) +L(u2): stb rWORD, 1(rDEST) + blr +L(u1): stb rALT, 1(rDEST) + blr + +END (strcpy) +libc_hidden_builtin_def (strcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/strlen.S b/REORG.TODO/sysdeps/powerpc/powerpc32/strlen.S new file mode 100644 index 0000000000..fa245f0760 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/strlen.S @@ -0,0 +1,190 @@ +/* Optimized strlen implementation for PowerPC. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* The algorithm here uses the following techniques: + + 1) Given a word 'x', we can test to see if it contains any 0 bytes + by subtracting 0x01010101, and seeing if any of the high bits of each + byte changed from 0 to 1. This works because the least significant + 0 byte must have had no incoming carry (otherwise it's not the least + significant), so it is 0x00 - 0x01 == 0xff. For all other + byte values, either they have the high bit set initially, or when + 1 is subtracted you get a value in the range 0x00-0x7f, none of which + have their high bit set. The expression here is + (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when + there were no 0x00 bytes in the word. You get 0x80 in bytes that + match, but possibly false 0x80 matches in the next more significant + byte to a true match due to carries. For little-endian this is + of no consequence since the least significant match is the one + we're interested in, but big-endian needs method 2 to find which + byte matches. + + 2) Given a word 'x', we can test to see _which_ byte was zero by + calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f). + This produces 0x80 in each byte that was zero, and 0x00 in all + the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each + byte, and the '| x' part ensures that bytes with the high bit set + produce 0x00. The addition will carry into the high bit of each byte + iff that byte had one of its low 7 bits set. We can then just see + which was the most significant bit set and divide by 8 to find how + many to add to the index. + This is from the book 'The PowerPC Compiler Writer's Guide', + by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren. + + We deal with strings not aligned to a word boundary by taking the + first word and ensuring that bytes not part of the string + are treated as nonzero. To allow for memory latency, we unroll the + loop a few times, being careful to ensure that we do not read ahead + across cache line boundaries. + + Questions to answer: + 1) How long are strings passed to strlen? If they're often really long, + we should probably use cache management instructions and/or unroll the + loop more. If they're often quite short, it might be better to use + fact (2) in the inner loop than have to recalculate it. + 2) How popular are bytes with the high bit set? If they are very rare, + on some processors it might be useful to use the simpler expression + ~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one + ALU), but this fails when any character has its high bit set. */ + +/* Some notes on register usage: Under the SVR4 ABI, we can use registers + 0 and 3 through 12 (so long as we don't call any procedures) without + saving them. We can also use registers 14 through 31 if we save them. + We can't use r1 (it's the stack pointer), r2 nor r13 because the user + program may expect them to hold their usual value if we get sent + a signal. Integer parameters are passed in r3 through r10. + We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving + them, the others we must save. */ + +/* int [r3] strlen (char *s [r3]) */ + +ENTRY (strlen) + +#define rTMP4 r0 +#define rRTN r3 /* incoming STR arg, outgoing result */ +#define rSTR r4 /* current string position */ +#define rPADN r5 /* number of padding bits we prepend to the + string to make it start at a word boundary */ +#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r7 /* constant 0x7f7f7f7f */ +#define rWORD1 r8 /* current string word */ +#define rWORD2 r9 /* next string word */ +#define rMASK r9 /* mask for first string word */ +#define rTMP1 r10 +#define rTMP2 r11 +#define rTMP3 r12 + + + clrrwi rSTR, rRTN, 2 + lis r7F7F, 0x7f7f + rlwinm rPADN, rRTN, 3, 27, 28 + lwz rWORD1, 0(rSTR) + li rMASK, -1 + addi r7F7F, r7F7F, 0x7f7f +/* We use method (2) on the first two words, because rFEFE isn't + required which reduces setup overhead. Also gives a faster return + for small strings on big-endian due to needing to recalculate with + method (2) anyway. */ +#ifdef __LITTLE_ENDIAN__ + slw rMASK, rMASK, rPADN +#else + srw rMASK, rMASK, rPADN +#endif + and rTMP1, r7F7F, rWORD1 + or rTMP2, r7F7F, rWORD1 + add rTMP1, rTMP1, r7F7F + nor rTMP3, rTMP2, rTMP1 + and. rTMP3, rTMP3, rMASK + mtcrf 0x01, rRTN + bne L(done0) + lis rFEFE, -0x101 + addi rFEFE, rFEFE, -0x101 +/* Are we now aligned to a doubleword boundary? */ + bt 29, L(loop) + +/* Handle second word of pair. */ +/* Perhaps use method (1) here for little-endian, saving one instruction? */ + lwzu rWORD1, 4(rSTR) + and rTMP1, r7F7F, rWORD1 + or rTMP2, r7F7F, rWORD1 + add rTMP1, rTMP1, r7F7F + nor. rTMP3, rTMP2, rTMP1 + bne L(done0) + +/* The loop. */ + +L(loop): + lwz rWORD1, 4(rSTR) + lwzu rWORD2, 8(rSTR) + add rTMP1, rFEFE, rWORD1 + nor rTMP2, r7F7F, rWORD1 + and. rTMP1, rTMP1, rTMP2 + add rTMP3, rFEFE, rWORD2 + nor rTMP4, r7F7F, rWORD2 + bne L(done1) + and. rTMP3, rTMP3, rTMP4 + beq L(loop) + +#ifndef __LITTLE_ENDIAN__ + and rTMP1, r7F7F, rWORD2 + add rTMP1, rTMP1, r7F7F + andc rTMP3, rTMP4, rTMP1 + b L(done0) + +L(done1): + and rTMP1, r7F7F, rWORD1 + subi rSTR, rSTR, 4 + add rTMP1, rTMP1, r7F7F + andc rTMP3, rTMP2, rTMP1 + +/* When we get to here, rSTR points to the first word in the string that + contains a zero byte, and rTMP3 has 0x80 for bytes that are zero, + and 0x00 otherwise. */ +L(done0): + cntlzw rTMP3, rTMP3 + subf rTMP1, rRTN, rSTR + srwi rTMP3, rTMP3, 3 + add rRTN, rTMP1, rTMP3 + blr +#else + +L(done0): + addi rTMP1, rTMP3, -1 /* Form a mask from trailing zeros. */ + andc rTMP1, rTMP1, rTMP3 + cntlzw rTMP1, rTMP1 /* Count bits not in the mask. */ + subf rTMP3, rRTN, rSTR + subfic rTMP1, rTMP1, 32-7 + srwi rTMP1, rTMP1, 3 + add rRTN, rTMP1, rTMP3 + blr + +L(done1): + addi rTMP3, rTMP1, -1 + andc rTMP3, rTMP3, rTMP1 + cntlzw rTMP3, rTMP3 + subf rTMP1, rRTN, rSTR + subfic rTMP3, rTMP3, 32-7-32 + srawi rTMP3, rTMP3, 3 + add rRTN, rTMP1, rTMP3 + blr +#endif + +END (strlen) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc32/strncmp.S new file mode 100644 index 0000000000..dadc90d661 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/strncmp.S @@ -0,0 +1,181 @@ +/* Optimized strcmp implementation for PowerPC32. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */ + +EALIGN (strncmp, 4, 0) + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rFEFE r8 /* constant 0xfefefeff (-0x01010101) */ +#define r7F7F r9 /* constant 0x7f7f7f7f */ +#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */ +#define rBITDIF r11 /* bits that differ in s1 & s2 words */ +#define rTMP r12 + + dcbt 0,rSTR1 + or rTMP, rSTR2, rSTR1 + lis r7F7F, 0x7f7f + dcbt 0,rSTR2 + clrlwi. rTMP, rTMP, 30 + cmplwi cr1, rN, 0 + lis rFEFE, -0x101 + bne L(unaligned) +/* We are word aligned so set up for two loops. first a word + loop, then fall into the byte loop if any residual. */ + srwi. rTMP, rN, 2 + clrlwi rN, rN, 30 + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + cmplwi cr1, rN, 0 + beq L(unaligned) + + mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */ + lwz rWORD1, 0(rSTR1) + lwz rWORD2, 0(rSTR2) + b L(g1) + +L(g0): + lwzu rWORD1, 4(rSTR1) + bne- cr1, L(different) + lwzu rWORD2, 4(rSTR2) +L(g1): add rTMP, rFEFE, rWORD1 + nor rNEG, r7F7F, rWORD1 + bdz L(tail) + and. rTMP, rTMP, rNEG + cmpw cr1, rWORD1, rWORD2 + beq+ L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + +#ifdef __LITTLE_ENDIAN__ +L(endstring): + slwi rTMP, rTMP, 1 + addi rTMP2, rTMP, -1 + andc rTMP2, rTMP2, rTMP + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ + rlwinm rTMP, rWORD1, 8, 0xffffffff + rlwimi rTMP2, rWORD2, 24, 0, 7 + rlwimi rTMP, rWORD1, 24, 0, 7 + rlwimi rTMP2, rWORD2, 24, 16, 23 + rlwimi rTMP, rWORD1, 24, 16, 23 + xor. rBITDIF, rTMP, rTMP2 + sub rRTN, rTMP, rTMP2 + bgelr+ + ori rRTN, rTMP2, 1 + blr + +L(different): + lwz rWORD1, -4(rSTR1) + rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */ + rlwinm rTMP, rWORD1, 8, 0xffffffff + rlwimi rTMP2, rWORD2, 24, 0, 7 + rlwimi rTMP, rWORD1, 24, 0, 7 + rlwimi rTMP2, rWORD2, 24, 16, 23 + rlwimi rTMP, rWORD1, 24, 16, 23 + xor. rBITDIF, rTMP, rTMP2 + sub rRTN, rTMP, rTMP2 + bgelr+ + ori rRTN, rTMP2, 1 + blr + +#else +L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzw rBITDIF, rBITDIF + cntlzw rNEG, rNEG + addi rNEG, rNEG, 7 + cmpw cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + bgelr+ cr1 +L(equal): + li rRTN, 0 + blr + +L(different): + lwz rWORD1, -4(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + bgelr+ +L(highbit): + ori rRTN, rWORD2, 1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(tail): + and. rTMP, rTMP, rNEG + cmpw cr1, rWORD1, rWORD2 + bne- L(endstring) + addi rSTR1, rSTR1, 4 + bne- cr1, L(different) + addi rSTR2, rSTR2, 4 + cmplwi cr1, rN, 0 +L(unaligned): + mtctr rN /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(uz) +L(ux): + li rRTN, 0 + blr + .align 4 +L(uz): + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + nop + b L(u1) +L(u0): + lbzu rWORD2, 1(rSTR2) +L(u1): + bdz L(u3) + cmpwi cr1, rWORD1, 0 + cmpw rWORD1, rWORD2 + beq- cr1, L(u3) + lbzu rWORD1, 1(rSTR1) + bne- L(u2) + lbzu rWORD2, 1(rSTR2) + bdz L(u3) + cmpwi cr1, rWORD1, 0 + cmpw rWORD1, rWORD2 + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + bne+ cr1, L(u0) + +L(u2): lbzu rWORD1, -1(rSTR1) +L(u3): sub rRTN, rWORD1, rWORD2 + blr +END (strncmp) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/sub_n.S b/REORG.TODO/sysdeps/powerpc/powerpc32/sub_n.S new file mode 100644 index 0000000000..659f348079 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/sub_n.S @@ -0,0 +1,68 @@ +/* Subtract two limb vectors of equal, non-zero length for PowerPC. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* mp_limb_t mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, + mp_size_t size) + Calculate s1-s2 and put result in res_ptr; return borrow, 0 or 1. */ + +/* Note on optimisation: This code is optimal for the 601. Almost every other + possible 2-unrolled inner loop will not be. Also, watch out for the + alignment... */ + +EALIGN (__mpn_sub_n, 3, 1) + +/* Set up for loop below. */ + mtcrf 0x01,r6 + srwi. r7,r6,1 + mtctr r7 + bt 31,L(2) + +/* Set the carry (clear the borrow). */ + subfc r0,r0,r0 +/* Adjust pointers for loop. */ + addi r3,r3,-4 + addi r4,r4,-4 + addi r5,r5,-4 + b L(0) + +L(2): lwz r7,0(r5) + lwz r6,0(r4) + subfc r6,r7,r6 + stw r6,0(r3) + beq L(1) + +/* Align start of loop to an odd word boundary to guarantee that the + last two words can be fetched in one access (for 601). This turns + out to be important. */ +L(0): + lwz r9,4(r4) + lwz r8,4(r5) + lwzu r6,8(r4) + lwzu r7,8(r5) + subfe r8,r8,r9 + stw r8,4(r3) + subfe r6,r7,r6 + stwu r6,8(r3) + bdnz L(0) +/* Return the borrow. */ +L(1): subfe r3,r3,r3 + neg r3,r3 + blr +END (__mpn_sub_n) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/submul_1.S b/REORG.TODO/sysdeps/powerpc/powerpc32/submul_1.S new file mode 100644 index 0000000000..c1183c40f9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/submul_1.S @@ -0,0 +1,51 @@ +/* Multiply a limb vector by a single limb, for PowerPC. + Copyright (C) 1993-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* mp_limb_t mpn_submul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, + mp_size_t s1_size, mp_limb_t s2_limb) + Calculate res-s1*s2 and put result back in res; return carry. */ + +ENTRY (__mpn_submul_1) + mtctr r5 + + lwz r0,0(r4) + mullw r7,r0,r6 + mulhwu r10,r0,r6 + lwz r9,0(r3) + subf r8,r7,r9 + addc r7,r7,r8 # invert cy (r7 is junk) + addi r3,r3,-4 # adjust res_ptr + bdz L(1) + +L(0): lwzu r0,4(r4) + stwu r8,4(r3) + mullw r8,r0,r6 + adde r7,r8,r10 + mulhwu r10,r0,r6 + lwz r9,4(r3) + addze r10,r10 + subf r8,r7,r9 + addc r7,r7,r8 # invert cy (r7 is junk) + bdnz L(0) + +L(1): stw r8,4(r3) + addze r3,r10 + blr +END (__mpn_submul_1) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/symbol-hacks.h b/REORG.TODO/sysdeps/powerpc/powerpc32/symbol-hacks.h new file mode 100644 index 0000000000..dbb3141621 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/symbol-hacks.h @@ -0,0 +1,21 @@ +/* Hacks needed for symbol manipulation. powerpc version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/wordsize-32/divdi3-symbol-hacks.h> + +#include_next "symbol-hacks.h" diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/sysdep.h b/REORG.TODO/sysdeps/powerpc/powerpc32/sysdep.h new file mode 100644 index 0000000000..f92ab2cded --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/sysdep.h @@ -0,0 +1,174 @@ +/* Assembly macros for 32-bit PowerPC. + Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/sysdep.h> + +#ifdef __ASSEMBLER__ + +/* If compiled for profiling, call `_mcount' at the start of each + function. */ +#ifdef PROF +/* The mcount code relies on a the return address being on the stack + to locate our caller and so it can restore it; so store one just + for its benefit. */ +# define CALL_MCOUNT \ + mflr r0; \ + stw r0,4(r1); \ + cfi_offset (lr, 4); \ + bl JUMPTARGET(_mcount); +#else /* PROF */ +# define CALL_MCOUNT /* Do nothing. */ +#endif /* PROF */ + +#define ENTRY(name) \ + .globl C_SYMBOL_NAME(name); \ + .type C_SYMBOL_NAME(name),@function; \ + .align ALIGNARG(2); \ + C_LABEL(name) \ + cfi_startproc; \ + CALL_MCOUNT + +/* helper macro for accessing the 32-bit powerpc GOT. */ + +#define SETUP_GOT_ACCESS(regname,GOT_LABEL) \ + bcl 20,31,GOT_LABEL ; \ +GOT_LABEL: ; \ + mflr (regname) + +#define EALIGN_W_0 /* No words to insert. */ +#define EALIGN_W_1 nop +#define EALIGN_W_2 nop;nop +#define EALIGN_W_3 nop;nop;nop +#define EALIGN_W_4 EALIGN_W_3;nop +#define EALIGN_W_5 EALIGN_W_4;nop +#define EALIGN_W_6 EALIGN_W_5;nop +#define EALIGN_W_7 EALIGN_W_6;nop + +/* EALIGN is like ENTRY, but does alignment to 'words'*4 bytes + past a 2^align boundary. */ +#ifdef PROF +# define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(name); \ + .type C_SYMBOL_NAME(name),@function; \ + .align ALIGNARG(2); \ + C_LABEL(name) \ + cfi_startproc; \ + CALL_MCOUNT \ + b 0f; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + 0: +#else /* PROF */ +# define EALIGN(name, alignt, words) \ + .globl C_SYMBOL_NAME(name); \ + .type C_SYMBOL_NAME(name),@function; \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ + C_LABEL(name) \ + cfi_startproc; +#endif + +#undef END +#define END(name) \ + cfi_endproc; \ + ASM_SIZE_DIRECTIVE(name) + +#if ! IS_IN(rtld) && defined (ENABLE_LOCK_ELISION) +# define ABORT_TRANSACTION \ + cmpwi 2,0; \ + beq 1f; \ + lwz 0,TM_CAPABLE(2); \ + cmpwi 0,0; \ + beq 1f; \ + li 11,_ABORT_SYSCALL; \ + tabort. 11; \ + .align 4; \ +1: +#else +# define ABORT_TRANSACTION +#endif + +#define DO_CALL(syscall) \ + ABORT_TRANSACTION \ + li 0,syscall; \ + sc + +#undef JUMPTARGET +#ifdef PIC +# define JUMPTARGET(name) name##@plt +#else +# define JUMPTARGET(name) name +#endif + +#if defined SHARED && defined PIC && !defined NO_HIDDEN +# undef HIDDEN_JUMPTARGET +# define HIDDEN_JUMPTARGET(name) __GI_##name##@local +#endif + +#define PSEUDO(name, syscall_name, args) \ + .section ".text"; \ + ENTRY (name) \ + DO_CALL (SYS_ify (syscall_name)); + +#define PSEUDO_RET \ + bnslr+; \ + b __syscall_error@local +#define ret PSEUDO_RET + +#undef PSEUDO_END +#define PSEUDO_END(name) \ + END (name) + +#define PSEUDO_NOERRNO(name, syscall_name, args) \ + .section ".text"; \ + ENTRY (name) \ + DO_CALL (SYS_ify (syscall_name)); + +#define PSEUDO_RET_NOERRNO \ + blr +#define ret_NOERRNO PSEUDO_RET_NOERRNO + +#undef PSEUDO_END_NOERRNO +#define PSEUDO_END_NOERRNO(name) \ + END (name) + +#define PSEUDO_ERRVAL(name, syscall_name, args) \ + .section ".text"; \ + ENTRY (name) \ + DO_CALL (SYS_ify (syscall_name)); + +#define PSEUDO_RET_ERRVAL \ + blr +#define ret_ERRVAL PSEUDO_RET_ERRVAL + +#undef PSEUDO_END_ERRVAL +#define PSEUDO_END_ERRVAL(name) \ + END (name) + +/* Local labels stripped out by the linker. */ +#undef L +#define L(x) .L##x + +#define XGLUE(a,b) a##b +#define GLUE(a,b) XGLUE (a,b) +#define GENERATE_GOT_LABEL(name) GLUE (.got_label, name) + +/* Label in text section. */ +#define C_TEXT(name) name + +#endif /* __ASSEMBLER__ */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/tls-macros.h b/REORG.TODO/sysdeps/powerpc/powerpc32/tls-macros.h new file mode 100644 index 0000000000..ee0eac4858 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/tls-macros.h @@ -0,0 +1,49 @@ +/* Include sysdeps/powerpc/tls-macros.h for __TLS_CALL_CLOBBERS */ +#include_next "tls-macros.h" + +/* PowerPC32 Local Exec TLS access. */ +#define TLS_LE(x) \ + ({ int *__result; \ + asm ("addi %0,2," #x "@tprel" \ + : "=r" (__result)); \ + __result; }) + +/* PowerPC32 Initial Exec TLS access. */ +#define TLS_IE(x) \ + ({ int *__result; \ + asm ("bcl 20,31,1f\n1:\t" \ + "mflr %0\n\t" \ + "addis %0,%0,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" \ + "addi %0,%0,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" \ + "lwz %0," #x "@got@tprel(%0)\n\t" \ + "add %0,%0," #x "@tls" \ + : "=b" (__result) : \ + : "lr"); \ + __result; }) + +/* PowerPC32 Local Dynamic TLS access. */ +#define TLS_LD(x) \ + ({ int *__result; \ + asm ("bcl 20,31,1f\n1:\t" \ + "mflr 3\n\t" \ + "addis 3,3,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" \ + "addi 3,3,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" \ + "addi 3,3," #x "@got@tlsld\n\t" \ + "bl __tls_get_addr@plt\n\t" \ + "addi %0,3," #x "@dtprel" \ + : "=r" (__result) : \ + : "3", __TLS_CALL_CLOBBERS); \ + __result; }) + +/* PowerPC32 General Dynamic TLS access. */ +#define TLS_GD(x) \ + ({ register int *__result __asm__ ("r3"); \ + asm ("bcl 20,31,1f\n1:\t" \ + "mflr 3\n\t" \ + "addis 3,3,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" \ + "addi 3,3,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" \ + "addi 3,3," #x "@got@tlsgd\n\t" \ + "bl __tls_get_addr@plt" \ + : "=r" (__result) : \ + : __TLS_CALL_CLOBBERS); \ + __result; }) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc32/tst-audit.h b/REORG.TODO/sysdeps/powerpc/powerpc32/tst-audit.h new file mode 100644 index 0000000000..2e5e0c91d5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc32/tst-audit.h @@ -0,0 +1,25 @@ +/* Definitions for testing PLT entry/exit auditing. PowerPC32 version. + + Copyright (C) 2012-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define pltenter la_ppc32_gnu_pltenter +#define pltexit la_ppc32_gnu_pltexit +#define La_regs La_ppc32_regs +#define La_retval La_ppc32_retval +#define int_retval lrv_r3 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/970/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/970/Implies new file mode 100644 index 0000000000..bedb20b65c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/970/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power4/fpu +powerpc/powerpc64/power4 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/Implies new file mode 100644 index 0000000000..a8cae95f9d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/Implies @@ -0,0 +1 @@ +wordsize-64 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc64/Makefile new file mode 100644 index 0000000000..9d15db0328 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/Makefile @@ -0,0 +1,49 @@ +# Powerpc64 specific build options. +# this is ./sysdeps/powerpc/powerpc64/Makefile + +# Each TOC entry takes 8 bytes and the TOC holds up to 2^16 bytes, +# or 8192 entries. +# If -fpic is not specified, the latest gcc-3.2.1 now generates +# different code for call stubs (without the TOC reload). +# Shared objects need the TOC reload so specify -fpic. +ifeq (yes,$(build-shared)) +pic-ccflag = -fpic +endif + +# These flags prevent FPU or Altivec registers from being used, +# for code called in contexts that is not allowed to touch those registers. +# Stupid GCC requires us to pass all these ridiculous switches. We need to +# pass the -mno-* switches as well to prevent the compiler from attempting +# to emit altivec or vsx instructions, especially when the registers aren't +# available. +no-special-regs := $(sort $(foreach n,40 41 50 51 60 61 62 63 \ + $(foreach m,2 3 4 5 6 7 8 9, \ + 3$m 4$m 5$m),\ + -ffixed-$n)) \ + $(sort $(foreach n,$(foreach m,0 1 2 3 4 5 6 7 8 9,\ + $m 1$m 2$m) 30 31,\ + -ffixed-v$n)) \ + -ffixed-vrsave -ffixed-vscr -mno-altivec -mno-vsx + +# Need to prevent gcc from using fprs in code used during dynamic linking. + +CFLAGS-dl-runtime.os = $(no-special-regs) +CFLAGS-dl-lookup.os = $(no-special-regs) +CFLAGS-dl-misc.os = $(no-special-regs) +CFLAGS-rtld-mempcpy.os = $(no-special-regs) +CFLAGS-rtld-memmove.os = $(no-special-regs) +CFLAGS-rtld-memchr.os = $(no-special-regs) +CFLAGS-rtld-strnlen.os = $(no-special-regs) + +ifeq ($(subdir),elf) +# help gcc inline asm code from dl-machine.h ++cflags += -finline-limit=2000 +endif + +ifeq ($(subdir),gmon) +# The assembly functions assume that fp arg regs are not trashed. +# Compiling with -msoft-float ensures that fp regs are not used +# for moving memory around. +CFLAGS-mcount.c += $(no-special-regs) +sysdep_routines += ppc-mcount +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/__longjmp-common.S b/REORG.TODO/sysdeps/powerpc/powerpc64/__longjmp-common.S new file mode 100644 index 0000000000..efda025b41 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/__longjmp-common.S @@ -0,0 +1,183 @@ +/* longjmp for PowerPC64. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <stap-probe.h> +#define _ASM +#define _SETJMP_H +#ifdef __NO_VMX__ +# include <novmxsetjmp.h> +#else +# include <jmpbuf-offsets.h> +#endif + +#ifndef __NO_VMX__ + .section ".toc","aw" +.LC__dl_hwcap: +# ifdef SHARED +# if IS_IN (rtld) + /* Inside ld.so we use the local alias to avoid runtime GOT + relocations. */ + .tc _rtld_local_ro[TC],_rtld_local_ro +# else + .tc _rtld_global_ro[TC],_rtld_global_ro +# endif +# else + .tc _dl_hwcap[TC],_dl_hwcap +# endif + .section ".text" +#endif + + .machine "altivec" +ENTRY (__longjmp) + CALL_MCOUNT 2 +#ifndef __NO_VMX__ + ld r5,.LC__dl_hwcap@toc(r2) +# ifdef SHARED + /* Load _rtld-global._dl_hwcap. */ + ld r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r5) +# else + ld r5,0(r5) /* Load extern _dl_hwcap. */ +# endif + andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16) + beq L(no_vmx) + la r5,((JB_VRS)*8)(3) + andi. r6,r5,0xf + lwz r0,((JB_VRSAVE)*8)(3) /* 32-bit VRSAVE. */ + mtspr VRSAVE,r0 + beq+ L(aligned_restore_vmx) + addi r6,r5,16 + lvsl v0,0,r5 + lvx v1,0,r5 + addi r5,r5,32 + lvx v21,0,r6 + vperm v20,v1,v21,v0 +# define load_misaligned_vmx_lo_loaded(loadvr,lovr,shiftvr,loadgpr,addgpr) \ + addi addgpr,addgpr,32; \ + lvx lovr,0,loadgpr; \ + vperm loadvr,loadvr,lovr,shiftvr; + load_misaligned_vmx_lo_loaded(v21,v22,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v22,v23,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v23,v24,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v24,v25,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v25,v26,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v26,v27,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v27,v28,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v28,v29,v0,r6,r5) + load_misaligned_vmx_lo_loaded(v29,v30,v0,r5,r6) + load_misaligned_vmx_lo_loaded(v30,v31,v0,r6,r5) + lvx v1,0,r5 + vperm v31,v31,v1,v0 + b L(no_vmx) +L(aligned_restore_vmx): + addi r6,r5,16 + lvx v20,0,r5 + addi r5,r5,32 + lvx v21,0,r6 + addi r6,r6,32 + lvx v22,0,r5 + addi r5,r5,32 + lvx v23,0,r6 + addi r6,r6,32 + lvx v24,0,r5 + addi r5,r5,32 + lvx v25,0,r6 + addi r6,r6,32 + lvx v26,0,r5 + addi r5,r5,32 + lvx v27,0,r6 + addi r6,r6,32 + lvx v28,0,r5 + addi r5,r5,32 + lvx v29,0,r6 + addi r6,r6,32 + lvx v30,0,r5 + lvx v31,0,r6 +L(no_vmx): +#endif +#if defined PTR_DEMANGLE || defined CHECK_SP + ld r22,(JB_GPR1*8)(r3) +#else + ld r1,(JB_GPR1*8)(r3) +#endif +#ifdef PTR_DEMANGLE +# ifdef CHECK_SP + PTR_DEMANGLE3 (r22, r22, r25) +# else + PTR_DEMANGLE3 (r1, r22, r25) +# endif +#endif +#ifdef CHECK_SP + CHECK_SP (r22) + mr r1,r22 +#endif + ld r2,(JB_GPR2*8)(r3) + ld r0,(JB_LR*8)(r3) + ld r14,((JB_GPRS+0)*8)(r3) + lfd fp14,((JB_FPRS+0)*8)(r3) +#if defined SHARED && !IS_IN (rtld) + std r2,FRAME_TOC_SAVE(r1) /* Restore the callers TOC save area. */ +#endif + ld r15,((JB_GPRS+1)*8)(r3) + lfd fp15,((JB_FPRS+1)*8)(r3) + ld r16,((JB_GPRS+2)*8)(r3) + lfd fp16,((JB_FPRS+2)*8)(r3) + ld r17,((JB_GPRS+3)*8)(r3) + lfd fp17,((JB_FPRS+3)*8)(r3) + ld r18,((JB_GPRS+4)*8)(r3) + lfd fp18,((JB_FPRS+4)*8)(r3) + ld r19,((JB_GPRS+5)*8)(r3) + lfd fp19,((JB_FPRS+5)*8)(r3) + ld r20,((JB_GPRS+6)*8)(r3) + lfd fp20,((JB_FPRS+6)*8)(r3) +#ifdef PTR_DEMANGLE + PTR_DEMANGLE2 (r0, r25) +#endif + /* longjmp/longjmp_target probe expects longjmp first argument (8@3), + second argument (-4@4), and target address (8@0), respectively. */ + LIBC_PROBE (longjmp, 3, 8@3, -4@4, 8@0) + mtlr r0 +/* std r2,FRAME_TOC_SAVE(r1) Restore the TOC save area. */ + ld r21,((JB_GPRS+7)*8)(r3) + lfd fp21,((JB_FPRS+7)*8)(r3) + ld r22,((JB_GPRS+8)*8)(r3) + lfd fp22,((JB_FPRS+8)*8)(r3) + lwz r5,((JB_CR*8)+4)(r3) /* 32-bit CR. */ + ld r23,((JB_GPRS+9)*8)(r3) + lfd fp23,((JB_FPRS+9)*8)(r3) + ld r24,((JB_GPRS+10)*8)(r3) + lfd fp24,((JB_FPRS+10)*8)(r3) + ld r25,((JB_GPRS+11)*8)(r3) + lfd fp25,((JB_FPRS+11)*8)(r3) + mtcrf 0xFF,r5 + ld r26,((JB_GPRS+12)*8)(r3) + lfd fp26,((JB_FPRS+12)*8)(r3) + ld r27,((JB_GPRS+13)*8)(r3) + lfd fp27,((JB_FPRS+13)*8)(r3) + ld r28,((JB_GPRS+14)*8)(r3) + lfd fp28,((JB_FPRS+14)*8)(r3) + ld r29,((JB_GPRS+15)*8)(r3) + lfd fp29,((JB_FPRS+15)*8)(r3) + ld r30,((JB_GPRS+16)*8)(r3) + lfd fp30,((JB_FPRS+16)*8)(r3) + ld r31,((JB_GPRS+17)*8)(r3) + lfd fp31,((JB_FPRS+17)*8)(r3) + LIBC_PROBE (longjmp_target, 3, 8@3, -4@4, 8@0) + mr r3,r4 + blr +END (__longjmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/__longjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/__longjmp.S new file mode 100644 index 0000000000..78659d012f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/__longjmp.S @@ -0,0 +1,39 @@ +/* AltiVec/VMX (new) version of __longjmp for PowerPC64. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libc-symbols.h> +#include <rtld-global-offsets.h> +#include <shlib-compat.h> + +#if !IS_IN (libc) +/* Build a non-versioned object for rtld-*. */ +# include "__longjmp-common.S" + +#else /* IS_IN (libc) */ +strong_alias (__vmx__longjmp, __longjmp) +# define __longjmp __vmx__longjmp +# include "__longjmp-common.S" + +# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_3, GLIBC_2_3_4) +# define __NO_VMX__ +# undef __longjmp +# undef JB_SIZE +# define __longjmp __novmx__longjmp +# include "__longjmp-common.S" +# endif +#endif /* IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/a2/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/a2/memcpy.S new file mode 100644 index 0000000000..ff30898df5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/a2/memcpy.S @@ -0,0 +1,528 @@ +/* Optimized memcpy implementation for PowerPC A2. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Michael Brutman <brutman@us.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + +#define PREFETCH_AHEAD 4 /* no cache lines SRC prefetching ahead */ +#define ZERO_AHEAD 2 /* no cache lines DST zeroing ahead */ + + .section ".toc","aw" +.LC0: + .tc __cache_line_size[TC],__cache_line_size + .section ".text" + .align 2 + + + .machine a2 +EALIGN (MEMCPY, 5, 0) + CALL_MCOUNT 3 + + dcbt 0,r4 /* Prefetch ONE SRC cacheline */ + cmpldi cr1,r5,16 /* is size < 16 ? */ + mr r6,r3 /* Copy dest reg to r6; */ + blt+ cr1,L(shortcopy) + + + /* Big copy (16 bytes or more) + + Figure out how far to the nearest quadword boundary, or if we are + on one already. Also get the cache line size. + + r3 - return value (always) + r4 - current source addr + r5 - copy length + r6 - current dest addr + */ + + neg r8,r3 /* LS 4 bits = # bytes to 8-byte dest bdry */ + ld r9,.LC0@toc(r2) /* Get cache line size (part 1) */ + clrldi r8,r8,64-4 /* align to 16byte boundary */ + sub r7,r4,r3 /* compute offset to src from dest */ + lwz r9,0(r9) /* Get cache line size (part 2) */ + cmpldi cr0,r8,0 /* Were we aligned on a 16 byte bdy? */ + addi r10,r9,-1 /* Cache line mask */ + beq+ L(dst_aligned) + + + + /* Destination is not aligned on quadword boundary. Get us to one. + + r3 - return value (always) + r4 - current source addr + r5 - copy length + r6 - current dest addr + r7 - offset to src from dest + r8 - number of bytes to quadword boundary + */ + + mtcrf 0x01,r8 /* put #bytes to boundary into cr7 */ + subf r5,r8,r5 /* adjust remaining len */ + + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte addr */ + stb r0,0(r6) + addi r6,r6,1 +1: + bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte addr */ + sth r0,0(r6) + addi r6,r6,2 +2: + bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte addr */ + stw r0,0(r6) + addi r6,r6,4 +4: + bf cr7*4+0,8f + ldx r0,r7,r6 /* copy 8 byte addr */ + std r0,0(r6) + addi r6,r6,8 +8: + add r4,r7,r6 /* update src addr */ + + + + /* Dest is quadword aligned now. + + Lots of decisions to make. If we are copying less than a cache + line we won't be here long. If we are not on a cache line + boundary we need to get there. And then we need to figure out + how many cache lines ahead to pre-touch. + + r3 - return value (always) + r4 - current source addr + r5 - copy length + r6 - current dest addr + */ + + + .align 4 +L(dst_aligned): + cmpdi cr0,r9,0 /* Cache line size set? */ + bne+ cr0,L(cachelineset) + +/* __cache_line_size not set: generic byte copy without much optimization */ + clrldi. r0,r5,63 /* If length is odd copy one byte */ + beq L(cachelinenotset_align) + lbz r7,0(r4) /* Read one byte from source */ + addi r5,r5,-1 /* Update length */ + addi r4,r4,1 /* Update source pointer address */ + stb r7,0(r6) /* Store one byte at dest */ + addi r6,r6,1 /* Update dest pointer address */ +L(cachelinenotset_align): + cmpdi cr7,r5,0 /* If length is 0 return */ + beqlr cr7 + ori r2,r2,0 /* Force a new dispatch group */ +L(cachelinenotset_loop): + addic. r5,r5,-2 /* Update length */ + lbz r7,0(r4) /* Load 2 bytes from source */ + lbz r8,1(r4) + addi r4,r4,2 /* Update source pointer address */ + stb r7,0(r6) /* Store 2 bytes on dest */ + stb r8,1(r6) + addi r6,r6,2 /* Update dest pointer address */ + bne L(cachelinenotset_loop) + blr + + +L(cachelineset): + cmpd cr5,r5,r10 /* Less than a cacheline to go? */ + + neg r7,r6 /* How far to next cacheline bdy? */ + + addi r6,r6,-8 /* prepare for stdu */ + cmpdi cr0,r9,128 + addi r4,r4,-8 /* prepare for ldu */ + + + ble+ cr5,L(lessthancacheline) + + beq- cr0,L(big_lines) /* 128 byte line code */ + + + + /* More than a cacheline left to go, and using 64 byte cachelines */ + + clrldi r7,r7,64-6 /* How far to next cacheline bdy? */ + + cmpldi cr6,r7,0 /* Are we on a cacheline bdy already? */ + + /* Reduce total len by what it takes to get to the next cache line */ + subf r5,r7,r5 + srdi r7,r7,4 /* How many qws to get to the line bdy? */ + + /* How many full cache lines to copy after getting to a line bdy? */ + srdi r10,r5,6 + + cmpldi r10,0 /* If no full cache lines to copy ... */ + li r11,0 /* number cachelines to copy with prefetch */ + beq L(nocacheprefetch) + + + /* We are here because we have at least one full cache line to copy, + and therefore some pre-touching to do. */ + + cmpldi r10,PREFETCH_AHEAD + li r12,64+8 /* prefetch distance */ + ble L(lessthanmaxprefetch) + + /* We can only do so much pre-fetching. R11 will have the count of + lines left to prefetch after the initial batch of prefetches + are executed. */ + + subi r11,r10,PREFETCH_AHEAD + li r10,PREFETCH_AHEAD + +L(lessthanmaxprefetch): + mtctr r10 + + /* At this point r10/ctr hold the number of lines to prefetch in this + initial batch, and r11 holds any remainder. */ + +L(prefetchSRC): + dcbt r12,r4 + addi r12,r12,64 + bdnz L(prefetchSRC) + + + /* Prefetching is done, or was not needed. + + cr6 - are we on a cacheline boundary already? + r7 - number of quadwords to the next cacheline boundary + */ + +L(nocacheprefetch): + mtctr r7 + + cmpldi cr1,r5,64 /* Less than a cache line to copy? */ + + /* How many bytes are left after we copy whatever full + cache lines we can get? */ + clrldi r5,r5,64-6 + + beq cr6,L(cachelinealigned) + + + /* Copy quadwords up to the next cacheline boundary */ + +L(aligntocacheline): + ld r9,0x08(r4) + ld r7,0x10(r4) + addi r4,r4,0x10 + std r9,0x08(r6) + stdu r7,0x10(r6) + bdnz L(aligntocacheline) + + + .align 4 +L(cachelinealigned): /* copy while cache lines */ + + blt- cr1,L(lessthancacheline) /* size <64 */ + +L(outerloop): + cmpdi r11,0 + mtctr r11 + beq- L(endloop) + + li r11,64*ZERO_AHEAD +8 /* DCBZ dist */ + + .align 4 + /* Copy whole cachelines, optimized by prefetching SRC cacheline */ +L(loop): /* Copy aligned body */ + dcbt r12,r4 /* PREFETCH SOURCE some cache lines ahead */ + ld r9, 0x08(r4) + dcbz r11,r6 + ld r7, 0x10(r4) + ld r8, 0x18(r4) + ld r0, 0x20(r4) + std r9, 0x08(r6) + std r7, 0x10(r6) + std r8, 0x18(r6) + std r0, 0x20(r6) + ld r9, 0x28(r4) + ld r7, 0x30(r4) + ld r8, 0x38(r4) + ld r0, 0x40(r4) + addi r4, r4,0x40 + std r9, 0x28(r6) + std r7, 0x30(r6) + std r8, 0x38(r6) + stdu r0, 0x40(r6) + + bdnz L(loop) + + +L(endloop): + cmpdi r10,0 + beq- L(endloop2) + mtctr r10 + +L(loop2): /* Copy aligned body */ + ld r9, 0x08(r4) + ld r7, 0x10(r4) + ld r8, 0x18(r4) + ld r0, 0x20(r4) + std r9, 0x08(r6) + std r7, 0x10(r6) + std r8, 0x18(r6) + std r0, 0x20(r6) + ld r9, 0x28(r4) + ld r7, 0x30(r4) + ld r8, 0x38(r4) + ld r0, 0x40(r4) + addi r4, r4,0x40 + std r9, 0x28(r6) + std r7, 0x30(r6) + std r8, 0x38(r6) + stdu r0, 0x40(r6) + + bdnz L(loop2) +L(endloop2): + + + .align 4 +L(lessthancacheline): /* Was there less than cache to do ? */ + cmpldi cr0,r5,16 + srdi r7,r5,4 /* divide size by 16 */ + blt- L(do_lt16) + mtctr r7 + +L(copy_remaining): + ld r8,0x08(r4) + ld r7,0x10(r4) + addi r4,r4,0x10 + std r8,0x08(r6) + stdu r7,0x10(r6) + bdnz L(copy_remaining) + +L(do_lt16): /* less than 16 ? */ + cmpldi cr0,r5,0 /* copy remaining bytes (0-15) */ + beqlr+ /* no rest to copy */ + addi r4,r4,8 + addi r6,r6,8 + +L(shortcopy): /* SIMPLE COPY to handle size =< 15 bytes */ + mtcrf 0x01,r5 + sub r7,r4,r6 + bf- cr7*4+0,8f + ldx r0,r7,r6 /* copy 8 byte */ + std r0,0(r6) + addi r6,r6,8 +8: + bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte */ + stw r0,0(r6) + addi r6,r6,4 +4: + bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte */ + sth r0,0(r6) + addi r6,r6,2 +2: + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte */ + stb r0,0(r6) +1: + blr + + + + + + /* Similar to above, but for use with 128 byte lines. */ + + +L(big_lines): + + clrldi r7,r7,64-7 /* How far to next cacheline bdy? */ + + cmpldi cr6,r7,0 /* Are we on a cacheline bdy already? */ + + /* Reduce total len by what it takes to get to the next cache line */ + subf r5,r7,r5 + srdi r7,r7,4 /* How many qws to get to the line bdy? */ + + /* How many full cache lines to copy after getting to a line bdy? */ + srdi r10,r5,7 + + cmpldi r10,0 /* If no full cache lines to copy ... */ + li r11,0 /* number cachelines to copy with prefetch */ + beq L(nocacheprefetch_128) + + + /* We are here because we have at least one full cache line to copy, + and therefore some pre-touching to do. */ + + cmpldi r10,PREFETCH_AHEAD + li r12,128+8 /* prefetch distance */ + ble L(lessthanmaxprefetch_128) + + /* We can only do so much pre-fetching. R11 will have the count of + lines left to prefetch after the initial batch of prefetches + are executed. */ + + subi r11,r10,PREFETCH_AHEAD + li r10,PREFETCH_AHEAD + +L(lessthanmaxprefetch_128): + mtctr r10 + + /* At this point r10/ctr hold the number of lines to prefetch in this + initial batch, and r11 holds any remainder. */ + +L(prefetchSRC_128): + dcbt r12,r4 + addi r12,r12,128 + bdnz L(prefetchSRC_128) + + + /* Prefetching is done, or was not needed. + + cr6 - are we on a cacheline boundary already? + r7 - number of quadwords to the next cacheline boundary + */ + +L(nocacheprefetch_128): + mtctr r7 + + cmpldi cr1,r5,128 /* Less than a cache line to copy? */ + + /* How many bytes are left after we copy whatever full + cache lines we can get? */ + clrldi r5,r5,64-7 + + beq cr6,L(cachelinealigned_128) + + + /* Copy quadwords up to the next cacheline boundary */ + +L(aligntocacheline_128): + ld r9,0x08(r4) + ld r7,0x10(r4) + addi r4,r4,0x10 + std r9,0x08(r6) + stdu r7,0x10(r6) + bdnz L(aligntocacheline_128) + + +L(cachelinealigned_128): /* copy while cache lines */ + + blt- cr1,L(lessthancacheline) /* size <128 */ + +L(outerloop_128): + cmpdi r11,0 + mtctr r11 + beq- L(endloop_128) + + li r11,128*ZERO_AHEAD +8 /* DCBZ dist */ + + .align 4 + /* Copy whole cachelines, optimized by prefetching SRC cacheline */ +L(loop_128): /* Copy aligned body */ + dcbt r12,r4 /* PREFETCH SOURCE some cache lines ahead */ + ld r9, 0x08(r4) + dcbz r11,r6 + ld r7, 0x10(r4) + ld r8, 0x18(r4) + ld r0, 0x20(r4) + std r9, 0x08(r6) + std r7, 0x10(r6) + std r8, 0x18(r6) + std r0, 0x20(r6) + ld r9, 0x28(r4) + ld r7, 0x30(r4) + ld r8, 0x38(r4) + ld r0, 0x40(r4) + std r9, 0x28(r6) + std r7, 0x30(r6) + std r8, 0x38(r6) + std r0, 0x40(r6) + ld r9, 0x48(r4) + ld r7, 0x50(r4) + ld r8, 0x58(r4) + ld r0, 0x60(r4) + std r9, 0x48(r6) + std r7, 0x50(r6) + std r8, 0x58(r6) + std r0, 0x60(r6) + ld r9, 0x68(r4) + ld r7, 0x70(r4) + ld r8, 0x78(r4) + ld r0, 0x80(r4) + addi r4, r4,0x80 + std r9, 0x68(r6) + std r7, 0x70(r6) + std r8, 0x78(r6) + stdu r0, 0x80(r6) + + bdnz L(loop_128) + + +L(endloop_128): + cmpdi r10,0 + beq- L(endloop2_128) + mtctr r10 + +L(loop2_128): /* Copy aligned body */ + ld r9, 0x08(r4) + ld r7, 0x10(r4) + ld r8, 0x18(r4) + ld r0, 0x20(r4) + std r9, 0x08(r6) + std r7, 0x10(r6) + std r8, 0x18(r6) + std r0, 0x20(r6) + ld r9, 0x28(r4) + ld r7, 0x30(r4) + ld r8, 0x38(r4) + ld r0, 0x40(r4) + std r9, 0x28(r6) + std r7, 0x30(r6) + std r8, 0x38(r6) + std r0, 0x40(r6) + ld r9, 0x48(r4) + ld r7, 0x50(r4) + ld r8, 0x58(r4) + ld r0, 0x60(r4) + std r9, 0x48(r6) + std r7, 0x50(r6) + std r8, 0x58(r6) + std r0, 0x60(r6) + ld r9, 0x68(r4) + ld r7, 0x70(r4) + ld r8, 0x78(r4) + ld r0, 0x80(r4) + addi r4, r4,0x80 + std r9, 0x68(r6) + std r7, 0x70(r6) + std r8, 0x78(r6) + stdu r0, 0x80(r6) + + bdnz L(loop2_128) +L(endloop2_128): + + b L(lessthancacheline) + + +END_GEN_TB (MEMCPY,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/addmul_1.S b/REORG.TODO/sysdeps/powerpc/powerpc64/addmul_1.S new file mode 100644 index 0000000000..b4b052141d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/addmul_1.S @@ -0,0 +1,208 @@ +/* PowerPC64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add + the result to a second limb vector. + Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifdef USE_AS_SUBMUL +# define FUNC __mpn_submul_1 +# define ADDSUBC subfe +# define ADDSUB subfc +#else +# define FUNC __mpn_addmul_1 +# define ADDSUBC adde +# define ADDSUB addc +#endif + +#define RP r3 +#define UP r4 +#define N r5 +#define VL r6 + +EALIGN(FUNC, 5, 0) + std r31, -8(r1) + rldicl. r0, N, 0, 62 + std r30, -16(r1) + cmpdi VL, r0, 2 + std r29, -24(r1) + addi N, N, 3 + std r28, -32(r1) + srdi N, N, 2 + std r27, -40(r1) + mtctr N + beq cr0, L(b00) + blt cr6, L(b01) + beq cr6, L(b10) + +L(b11): ld r9, 0(UP) + ld r28, 0(RP) + mulld r0, r9, VL + mulhdu r12, r9, VL + ADDSUB r0, r0, r28 + std r0, 0(RP) + addi RP, RP, 8 + ld r9, 8(UP) + ld r27, 16(UP) + addi UP, UP, 24 +#ifdef USE_AS_SUBMUL + subfe r11, r11, r11 +#endif + b L(bot) + + .align 4 +L(b00): ld r9, 0(UP) + ld r27, 8(UP) + ld r28, 0(RP) + ld r29, 8(RP) + mulld r0, r9, VL + mulhdu N, r9, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + addc r7, r7, N + addze r12, r8 + ADDSUB r0, r0, r28 + std r0, 0(RP) + ADDSUBC r7, r7, r29 + std r7, 8(RP) + addi RP, RP, 16 + ld r9, 16(UP) + ld r27, 24(UP) + addi UP, UP, 32 +#ifdef USE_AS_SUBMUL + subfe r11, r11, r11 +#endif + b L(bot) + + .align 4 +L(b01): bdnz L(gt1) + ld r9, 0(UP) + ld r11, 0(RP) + mulld r0, r9, VL + mulhdu r8, r9, VL + ADDSUB r0, r0, r11 + std r0, 0(RP) +#ifdef USE_AS_SUBMUL + subfe r11, r11, r11 + addic r11, r11, 1 +#endif + addze RP, r8 + blr + +L(gt1): ld r9, 0(UP) + ld r27, 8(UP) + mulld r0, r9, VL + mulhdu N, r9, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + ld r9, 16(UP) + ld r28, 0(RP) + ld r29, 8(RP) + ld r30, 16(RP) + mulld r11, r9, VL + mulhdu r10, r9, VL + addc r7, r7, N + adde r11, r11, r8 + addze r12, r10 + ADDSUB r0, r0, r28 + std r0, 0(RP) + ADDSUBC r7, r7, r29 + std r7, 8(RP) + ADDSUBC r11, r11, r30 + std r11, 16(RP) + addi RP, RP, 24 + ld r9, 24(UP) + ld r27, 32(UP) + addi UP, UP, 40 +#ifdef USE_AS_SUBMUL + subfe r11, r11, r11 +#endif + b L(bot) + +L(b10): addic r0, r0, r0 + li r12, 0 + ld r9, 0(UP) + ld r27, 8(UP) + bdz L(end) + addi UP, UP, 16 + + .align 4 +L(top): mulld r0, r9, VL + mulhdu N, r9, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + ld r9, 0(UP) + ld r28, 0(RP) + ld r27, 8(UP) + ld r29, 8(RP) + adde r0, r0, r12 + adde r7, r7, N + mulld N, r9, VL + mulhdu r10, r9, VL + mulld r11, r27, VL + mulhdu r12, r27, VL + ld r9, 16(UP) + ld r30, 16(RP) + ld r27, 24(UP) + ld r31, 24(RP) + adde N, N, r8 + adde r11, r11, r10 + addze r12, r12 + ADDSUB r0, r0, r28 + std r0, 0(RP) + ADDSUBC r7, r7, r29 + std r7, 8(RP) + ADDSUBC N, N, r30 + std N, 16(RP) + ADDSUBC r11, r11, r31 + std r11, 24(RP) + addi UP, UP, 32 +#ifdef USE_AS_SUBMUL + subfe r11, r11, r11 +#endif + addi RP, RP, 32 +L(bot): +#ifdef USE_AS_SUBMUL + addic r11, r11, 1 +#endif + bdnz L(top) + +L(end): mulld r0, r9, VL + mulhdu N, r9, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + ld r28, 0(RP) + ld r29, 8(RP) + adde r0, r0, r12 + adde r7, r7, N + addze r8, r8 + ADDSUB r0, r0, r28 + std r0, 0(RP) + ADDSUBC r7, r7, r29 + std r7, 8(RP) +#ifdef USE_AS_SUBMUL + subfe r11, r11, r11 + addic r11, r11, 1 +#endif + addze RP, r8 + ld r31, -8(r1) + ld r30, -16(r1) + ld r29, -24(r1) + ld r28, -32(r1) + ld r27, -40(r1) + blr +END(FUNC) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/atomic-machine.h b/REORG.TODO/sysdeps/powerpc/powerpc64/atomic-machine.h new file mode 100644 index 0000000000..46df488b3c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/atomic-machine.h @@ -0,0 +1,242 @@ +/* Atomic operations. PowerPC64 version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Paul Mackerras <paulus@au.ibm.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* POWER6 adds a "Mutex Hint" to the Load and Reserve instruction. + This is a hint to the hardware to expect additional updates adjacent + to the lock word or not. If we are acquiring a Mutex, the hint + should be true. Otherwise we releasing a Mutex or doing a simple + atomic operation. In that case we don't expect additional updates + adjacent to the lock word after the Store Conditional and the hint + should be false. */ + +#if defined _ARCH_PWR6 || defined _ARCH_PWR6X +# define MUTEX_HINT_ACQ ",1" +# define MUTEX_HINT_REL ",0" +#else +# define MUTEX_HINT_ACQ +# define MUTEX_HINT_REL +#endif + +#define __HAVE_64B_ATOMICS 1 +#define USE_ATOMIC_COMPILER_BUILTINS 0 +#define ATOMIC_EXCHANGE_USES_CAS 1 + +/* The 32-bit exchange_bool is different on powerpc64 because the subf + does signed 64-bit arithmetic while the lwarx is 32-bit unsigned + (a load word and zero (high 32) form) load. + In powerpc64 register values are 64-bit by default, including oldval. + The value in old val unknown sign extension, lwarx loads the 32-bit + value as unsigned. So we explicitly clear the high 32 bits in oldval. */ +#define __arch_compare_and_exchange_bool_32_acq(mem, newval, oldval) \ +({ \ + unsigned int __tmp, __tmp2; \ + __asm __volatile (" clrldi %1,%1,32\n" \ + "1: lwarx %0,0,%2" MUTEX_HINT_ACQ "\n" \ + " subf. %0,%1,%0\n" \ + " bne 2f\n" \ + " stwcx. %4,0,%2\n" \ + " bne- 1b\n" \ + "2: " __ARCH_ACQ_INSTR \ + : "=&r" (__tmp), "=r" (__tmp2) \ + : "b" (mem), "1" (oldval), "r" (newval) \ + : "cr0", "memory"); \ + __tmp != 0; \ +}) + +/* + * Only powerpc64 processors support Load doubleword and reserve index (ldarx) + * and Store doubleword conditional indexed (stdcx) instructions. So here + * we define the 64-bit forms. + */ +#define __arch_compare_and_exchange_bool_64_acq(mem, newval, oldval) \ +({ \ + unsigned long __tmp; \ + __asm __volatile ( \ + "1: ldarx %0,0,%1" MUTEX_HINT_ACQ "\n" \ + " subf. %0,%2,%0\n" \ + " bne 2f\n" \ + " stdcx. %3,0,%1\n" \ + " bne- 1b\n" \ + "2: " __ARCH_ACQ_INSTR \ + : "=&r" (__tmp) \ + : "b" (mem), "r" (oldval), "r" (newval) \ + : "cr0", "memory"); \ + __tmp != 0; \ +}) + +#define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \ + ({ \ + __typeof (*(mem)) __tmp; \ + __typeof (mem) __memp = (mem); \ + __asm __volatile ( \ + "1: ldarx %0,0,%1" MUTEX_HINT_ACQ "\n" \ + " cmpd %0,%2\n" \ + " bne 2f\n" \ + " stdcx. %3,0,%1\n" \ + " bne- 1b\n" \ + "2: " __ARCH_ACQ_INSTR \ + : "=&r" (__tmp) \ + : "b" (__memp), "r" (oldval), "r" (newval) \ + : "cr0", "memory"); \ + __tmp; \ + }) + +#define __arch_compare_and_exchange_val_64_rel(mem, newval, oldval) \ + ({ \ + __typeof (*(mem)) __tmp; \ + __typeof (mem) __memp = (mem); \ + __asm __volatile (__ARCH_REL_INSTR "\n" \ + "1: ldarx %0,0,%1" MUTEX_HINT_REL "\n" \ + " cmpd %0,%2\n" \ + " bne 2f\n" \ + " stdcx. %3,0,%1\n" \ + " bne- 1b\n" \ + "2: " \ + : "=&r" (__tmp) \ + : "b" (__memp), "r" (oldval), "r" (newval) \ + : "cr0", "memory"); \ + __tmp; \ + }) + +#define __arch_atomic_exchange_64_acq(mem, value) \ + ({ \ + __typeof (*mem) __val; \ + __asm __volatile (__ARCH_REL_INSTR "\n" \ + "1: ldarx %0,0,%2" MUTEX_HINT_ACQ "\n" \ + " stdcx. %3,0,%2\n" \ + " bne- 1b\n" \ + " " __ARCH_ACQ_INSTR \ + : "=&r" (__val), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_64_rel(mem, value) \ + ({ \ + __typeof (*mem) __val; \ + __asm __volatile (__ARCH_REL_INSTR "\n" \ + "1: ldarx %0,0,%2" MUTEX_HINT_REL "\n" \ + " stdcx. %3,0,%2\n" \ + " bne- 1b" \ + : "=&r" (__val), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_and_add_64(mem, value) \ + ({ \ + __typeof (*mem) __val, __tmp; \ + __asm __volatile ("1: ldarx %0,0,%3\n" \ + " add %1,%0,%4\n" \ + " stdcx. %1,0,%3\n" \ + " bne- 1b" \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_and_add_64_acq(mem, value) \ + ({ \ + __typeof (*mem) __val, __tmp; \ + __asm __volatile ("1: ldarx %0,0,%3" MUTEX_HINT_ACQ "\n" \ + " add %1,%0,%4\n" \ + " stdcx. %1,0,%3\n" \ + " bne- 1b\n" \ + __ARCH_ACQ_INSTR \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_exchange_and_add_64_rel(mem, value) \ + ({ \ + __typeof (*mem) __val, __tmp; \ + __asm __volatile (__ARCH_REL_INSTR "\n" \ + "1: ldarx %0,0,%3" MUTEX_HINT_REL "\n" \ + " add %1,%0,%4\n" \ + " stdcx. %1,0,%3\n" \ + " bne- 1b" \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "r" (value), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_increment_val_64(mem) \ + ({ \ + __typeof (*(mem)) __val; \ + __asm __volatile ("1: ldarx %0,0,%2\n" \ + " addi %0,%0,1\n" \ + " stdcx. %0,0,%2\n" \ + " bne- 1b" \ + : "=&b" (__val), "=m" (*mem) \ + : "b" (mem), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_decrement_val_64(mem) \ + ({ \ + __typeof (*(mem)) __val; \ + __asm __volatile ("1: ldarx %0,0,%2\n" \ + " subi %0,%0,1\n" \ + " stdcx. %0,0,%2\n" \ + " bne- 1b" \ + : "=&b" (__val), "=m" (*mem) \ + : "b" (mem), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +#define __arch_atomic_decrement_if_positive_64(mem) \ + ({ int __val, __tmp; \ + __asm __volatile ("1: ldarx %0,0,%3\n" \ + " cmpdi 0,%0,0\n" \ + " addi %1,%0,-1\n" \ + " ble 2f\n" \ + " stdcx. %1,0,%3\n" \ + " bne- 1b\n" \ + "2: " __ARCH_ACQ_INSTR \ + : "=&b" (__val), "=&r" (__tmp), "=m" (*mem) \ + : "b" (mem), "m" (*mem) \ + : "cr0", "memory"); \ + __val; \ + }) + +/* + * All powerpc64 processors support the new "light weight" sync (lwsync). + */ +#define atomic_read_barrier() __asm ("lwsync" ::: "memory") +/* + * "light weight" sync can also be used for the release barrier. + */ +#ifndef UP +# define __ARCH_REL_INSTR "lwsync" +#endif +#define atomic_write_barrier() __asm ("lwsync" ::: "memory") + +/* + * Include the rest of the atomic ops macros which are common to both + * powerpc32 and powerpc64. + */ +#include_next <atomic-machine.h> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/backtrace.c b/REORG.TODO/sysdeps/powerpc/powerpc64/backtrace.c new file mode 100644 index 0000000000..723948d78f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/backtrace.c @@ -0,0 +1,104 @@ +/* Return backtrace of current program state. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <stddef.h> +#include <string.h> +#include <signal.h> +#include <stdint.h> + +#include <execinfo.h> +#include <libc-vdso.h> + +/* This is the stack layout we see with every stack frame. + Note that every routine is required by the ABI to lay out the stack + like this. + + +----------------+ +-----------------+ + %r1 -> | %r1 last frame--------> | %r1 last frame--->... --> NULL + | | | | + | cr save | | cr save | + | | | | + | (unused) | | return address | + +----------------+ +-----------------+ +*/ +struct layout +{ + struct layout *next; + long int condition_register; + void *return_address; +}; + +/* Since the signal handler is just like any other function it needs to + save/restore its LR and it will save it into callers stack frame. + Since a signal handler doesn't have a caller, the kernel creates a + dummy frame to make it look like it has a caller. */ +struct signal_frame_64 { +#define SIGNAL_FRAMESIZE 128 + char dummy[SIGNAL_FRAMESIZE]; + struct ucontext uc; + /* We don't care about the rest, since the IP value is at 'uc' field. */ +}; + +static inline int +is_sigtramp_address (void *nip) +{ +#ifdef SHARED + if (nip == VDSO_SYMBOL (sigtramp_rt64)) + return 1; +#endif + return 0; +} + +int +__backtrace (void **array, int size) +{ + struct layout *current; + int count; + + /* Force gcc to spill LR. */ + asm volatile ("" : "=l"(current)); + + /* Get the address on top-of-stack. */ + asm volatile ("ld %0,0(1)" : "=r"(current)); + + for ( count = 0; + current != NULL && count < size; + current = current->next, count++) + { + array[count] = current->return_address; + + /* Check if the symbol is the signal trampoline and get the interrupted + * symbol address from the trampoline saved area. */ + if (is_sigtramp_address (current->return_address)) + { + struct signal_frame_64 *sigframe = (struct signal_frame_64*) current; + array[++count] = (void*) sigframe->uc.uc_mcontext.gp_regs[PT_NIP]; + current = (void*) sigframe->uc.uc_mcontext.gp_regs[PT_R1]; + } + } + + /* It's possible the second-last stack frame can't return + (that is, it's __libc_start_main), in which case + the CRT startup code will have set its LR to 'NULL'. */ + if (count > 0 && array[count-1] == NULL) + count--; + + return count; +} +weak_alias (__backtrace, backtrace) +libc_hidden_def (__backtrace) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/bits/wordsize.h b/REORG.TODO/sysdeps/powerpc/powerpc64/bits/wordsize.h new file mode 100644 index 0000000000..04ca9debf0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/bits/wordsize.h @@ -0,0 +1,11 @@ +/* Determine the wordsize from the preprocessor defines. */ + +#if defined __powerpc64__ +# define __WORDSIZE 64 +# define __WORDSIZE_TIME64_COMPAT32 1 +#else +# define __WORDSIZE 32 +# define __WORDSIZE_TIME64_COMPAT32 0 +# define __WORDSIZE32_SIZE_ULONG 0 +# define __WORDSIZE32_PTRDIFF_LONG 0 +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/bsd-_setjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/bsd-_setjmp.S new file mode 100644 index 0000000000..86d49b1c6e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/bsd-_setjmp.S @@ -0,0 +1 @@ +/* _setjmp moved to setjmp-common.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/bsd-setjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/bsd-setjmp.S new file mode 100644 index 0000000000..38b734fcb4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/bsd-setjmp.S @@ -0,0 +1 @@ +/* setjmp moved to setjmp-common.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/bzero.S b/REORG.TODO/sysdeps/powerpc/powerpc64/bzero.S new file mode 100644 index 0000000000..41cfac5127 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/bzero.S @@ -0,0 +1,20 @@ +/* Optimized bzero `implementation' for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This code was moved into memset.S to solve a double stub call problem. + @local would have worked but it is not supported in PowerPC64 asm. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/cell/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/cell/memcpy.S new file mode 100644 index 0000000000..1cc66456e3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/cell/memcpy.S @@ -0,0 +1,246 @@ +/* Optimized memcpy implementation for CELL BE PowerPC. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + +#define PREFETCH_AHEAD 6 /* no cache lines SRC prefetching ahead */ +#define ZERO_AHEAD 4 /* no cache lines DST zeroing ahead */ + +/* memcpy routine optimized for CELL-BE-PPC v2.0 + * + * The CELL PPC core has 1 integer unit and 1 load/store unit + * CELL: + * 1st level data cache = 32K + * 2nd level data cache = 512K + * 3rd level data cache = 0K + * With 3.2 GHz clockrate the latency to 2nd level cache is >36 clocks, + * latency to memory is >400 clocks + * To improve copy performance we need to prefetch source data + * far ahead to hide this latency + * For best performance instruction forms ending in "." like "andi." + * should be avoided as the are implemented in microcode on CELL. + * The below code is loop unrolled for the CELL cache line of 128 bytes + */ + +.align 7 + +EALIGN (MEMCPY, 5, 0) + CALL_MCOUNT 3 + + dcbt 0,r4 /* Prefetch ONE SRC cacheline */ + cmpldi cr1,r5,16 /* is size < 16 ? */ + mr r6,r3 + blt+ cr1,.Lshortcopy + +.Lbigcopy: + neg r8,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ + clrldi r8,r8,64-4 /* align to 16byte boundary */ + sub r7,r4,r3 + cmpldi cr0,r8,0 + beq+ .Ldst_aligned + +.Ldst_unaligned: + mtcrf 0x01,r8 /* put #bytes to boundary into cr7 */ + subf r5,r8,r5 + + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte */ + stb r0,0(r6) + addi r6,r6,1 +1: bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte */ + sth r0,0(r6) + addi r6,r6,2 +2: bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte */ + stw r0,0(r6) + addi r6,r6,4 +4: bf cr7*4+0,8f + ldx r0,r7,r6 /* copy 8 byte */ + std r0,0(r6) + addi r6,r6,8 +8: + add r4,r7,r6 + +.Ldst_aligned: + + cmpdi cr5,r5,128-1 + + neg r7,r6 + addi r6,r6,-8 /* prepare for stdu */ + addi r4,r4,-8 /* prepare for ldu */ + + clrldi r7,r7,64-7 /* align to cacheline boundary */ + ble+ cr5,.Llessthancacheline + + cmpldi cr6,r7,0 + subf r5,r7,r5 + srdi r7,r7,4 /* divide size by 16 */ + srdi r10,r5,7 /* number of cache lines to copy */ + + cmpldi r10,0 + li r11,0 /* number cachelines to copy with prefetch */ + beq .Lnocacheprefetch + + cmpldi r10,PREFETCH_AHEAD + li r12,128+8 /* prefetch distance */ + ble .Llessthanmaxprefetch + + subi r11,r10,PREFETCH_AHEAD + li r10,PREFETCH_AHEAD + +.Llessthanmaxprefetch: + mtctr r10 + +.LprefetchSRC: + dcbt r12,r4 + addi r12,r12,128 + bdnz .LprefetchSRC + +.Lnocacheprefetch: + mtctr r7 + cmpldi cr1,r5,128 + clrldi r5,r5,64-7 + beq cr6,.Lcachelinealigned + +.Laligntocacheline: + ld r9,0x08(r4) + ldu r7,0x10(r4) + std r9,0x08(r6) + stdu r7,0x10(r6) + bdnz .Laligntocacheline + + +.Lcachelinealigned: /* copy while cache lines */ + + blt- cr1,.Llessthancacheline /* size <128 */ + +.Louterloop: + cmpdi r11,0 + mtctr r11 + beq- .Lendloop + + li r11,128*ZERO_AHEAD +8 /* DCBZ dist */ + +.align 4 + /* Copy whole cachelines, optimized by prefetching SRC cacheline */ +.Lloop: /* Copy aligned body */ + dcbt r12,r4 /* PREFETCH SOURCE some cache lines ahead */ + ld r9, 0x08(r4) + dcbz r11,r6 + ld r7, 0x10(r4) /* 4 register stride copy is optimal */ + ld r8, 0x18(r4) /* to hide 1st level cache latency. */ + ld r0, 0x20(r4) + std r9, 0x08(r6) + std r7, 0x10(r6) + std r8, 0x18(r6) + std r0, 0x20(r6) + ld r9, 0x28(r4) + ld r7, 0x30(r4) + ld r8, 0x38(r4) + ld r0, 0x40(r4) + std r9, 0x28(r6) + std r7, 0x30(r6) + std r8, 0x38(r6) + std r0, 0x40(r6) + ld r9, 0x48(r4) + ld r7, 0x50(r4) + ld r8, 0x58(r4) + ld r0, 0x60(r4) + std r9, 0x48(r6) + std r7, 0x50(r6) + std r8, 0x58(r6) + std r0, 0x60(r6) + ld r9, 0x68(r4) + ld r7, 0x70(r4) + ld r8, 0x78(r4) + ldu r0, 0x80(r4) + std r9, 0x68(r6) + std r7, 0x70(r6) + std r8, 0x78(r6) + stdu r0, 0x80(r6) + + bdnz .Lloop + +.Lendloop: + cmpdi r10,0 + sldi r10,r10,2 /* adjust from 128 to 32 byte stride */ + beq- .Lendloop2 + mtctr r10 + +.Lloop2: /* Copy aligned body */ + ld r9, 0x08(r4) + ld r7, 0x10(r4) + ld r8, 0x18(r4) + ldu r0, 0x20(r4) + std r9, 0x08(r6) + std r7, 0x10(r6) + std r8, 0x18(r6) + stdu r0, 0x20(r6) + + bdnz .Lloop2 +.Lendloop2: + +.Llessthancacheline: /* less than cache to do ? */ + cmpldi cr0,r5,16 + srdi r7,r5,4 /* divide size by 16 */ + blt- .Ldo_lt16 + mtctr r7 + +.Lcopy_remaining: + ld r8,0x08(r4) + ldu r7,0x10(r4) + std r8,0x08(r6) + stdu r7,0x10(r6) + bdnz .Lcopy_remaining + +.Ldo_lt16: /* less than 16 ? */ + cmpldi cr0,r5,0 /* copy remaining bytes (0-15) */ + beqlr+ /* no rest to copy */ + addi r4,r4,8 + addi r6,r6,8 + +.Lshortcopy: /* SIMPLE COPY to handle size =< 15 bytes */ + mtcrf 0x01,r5 + sub r7,r4,r6 + bf- cr7*4+0,8f + ldx r0,r7,r6 /* copy 8 byte */ + std r0,0(r6) + addi r6,r6,8 +8: + bf cr7*4+1,4f + lwzx r0,r7,r6 /* copy 4 byte */ + stw r0,0(r6) + addi r6,r6,4 +4: + bf cr7*4+2,2f + lhzx r0,r7,r6 /* copy 2 byte */ + sth r0,0(r6) + addi r6,r6,2 +2: + bf cr7*4+3,1f + lbzx r0,r7,r6 /* copy 1 byte */ + stb r0,0(r6) +1: blr + +END_GEN_TB (MEMCPY,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/configure b/REORG.TODO/sysdeps/powerpc/powerpc64/configure new file mode 100644 index 0000000000..7632a7be04 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/configure @@ -0,0 +1,33 @@ +# This file is generated from configure.ac by Autoconf. DO NOT EDIT! + # Local configure fragment for sysdeps/powerpc/powerpc64. + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for support for overlapping .opd entries" >&5 +$as_echo_n "checking for support for overlapping .opd entries... " >&6; } +if ${libc_cv_overlapping_opd+:} false; then : + $as_echo_n "(cached) " >&6 +else + libc_cv_overlapping_opd=no +echo 'void foo (void) {}' > conftest.c +if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -S conftest.c -o conftest.s 1>&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + if grep '\.TOC\.@tocbase' conftest.s > /dev/null; then + if grep '\.TOC\.@tocbase[ ]*,[ ]*0' conftest.s > /dev/null; then + : + else + libc_cv_overlapping_opd=yes + fi + fi +fi +rm -f conftest.c conftest.s + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_overlapping_opd" >&5 +$as_echo "$libc_cv_overlapping_opd" >&6; } +if test x$libc_cv_overlapping_opd = xyes; then + $as_echo "#define USE_PPC64_OVERLAPPING_OPD 1" >>confdefs.h + +fi diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/configure.ac b/REORG.TODO/sysdeps/powerpc/powerpc64/configure.ac new file mode 100644 index 0000000000..f309ba35a8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/configure.ac @@ -0,0 +1,23 @@ +GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. +# Local configure fragment for sysdeps/powerpc/powerpc64. + +AC_CACHE_CHECK(for support for overlapping .opd entries, +libc_cv_overlapping_opd, [dnl +libc_cv_overlapping_opd=no +echo 'void foo (void) {}' > conftest.c +if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS -S conftest.c -o conftest.s 1>&AS_MESSAGE_LOG_FD); then +changequote(,)dnl + if grep '\.TOC\.@tocbase' conftest.s > /dev/null; then + if grep '\.TOC\.@tocbase[ ]*,[ ]*0' conftest.s > /dev/null; then + : + else + libc_cv_overlapping_opd=yes + fi + fi +changequote([,])dnl +fi +rm -f conftest.c conftest.s +]) +if test x$libc_cv_overlapping_opd = xyes; then + AC_DEFINE(USE_PPC64_OVERLAPPING_OPD) +fi diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/crti.S b/REORG.TODO/sysdeps/powerpc/powerpc64/crti.S new file mode 100644 index 0000000000..fa4f5833ef --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/crti.S @@ -0,0 +1,88 @@ +/* Special .init and .fini section support for PowerPC64. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* crti.S puts a function prologue at the beginning of the .init and + .fini sections and defines global symbols for those addresses, so + they can be called as functions. The symbols _init and _fini are + magic and cause the linker to emit DT_INIT and DT_FINI. */ + +#include <libc-symbols.h> +#include <sysdep.h> + +#ifndef PREINIT_FUNCTION +# define PREINIT_FUNCTION __gmon_start__ +#endif + +#ifndef PREINIT_FUNCTION_WEAK +# define PREINIT_FUNCTION_WEAK 1 +#endif + +#if PREINIT_FUNCTION_WEAK + weak_extern (PREINIT_FUNCTION) +#else + .hidden PREINIT_FUNCTION +#endif + +#if PREINIT_FUNCTION_WEAK + .section ".toc", "aw" +.LC0: + .tc PREINIT_FUNCTION[TC], PREINIT_FUNCTION +#endif + .section ".init", "ax", @progbits + ENTRY_2(_init) + .align ALIGNARG (2) +BODY_LABEL (_init): + LOCALENTRY(_init) + mflr 0 + std 0, FRAME_LR_SAVE(r1) + stdu r1, -FRAME_MIN_SIZE_PARM(r1) +#if PREINIT_FUNCTION_WEAK + addis r9, r2, .LC0@toc@ha + ld r0, .LC0@toc@l(r9) + cmpdi cr7, r0, 0 + beq+ cr7, 1f +#endif + bl JUMPTARGET (PREINIT_FUNCTION) + nop +1: + + .section ".fini", "ax", @progbits + ENTRY_2(_fini) + .align ALIGNARG (2) +BODY_LABEL (_fini): + LOCALENTRY(_fini) + mflr 0 + std 0, FRAME_LR_SAVE(r1) + stdu r1, -FRAME_MIN_SIZE_PARM(r1) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/crtn.S b/REORG.TODO/sysdeps/powerpc/powerpc64/crtn.S new file mode 100644 index 0000000000..a8906aa16c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/crtn.S @@ -0,0 +1,51 @@ +/* Special .init and .fini section support for PowerPC64. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* crtn.S puts function epilogues in the .init and .fini sections + corresponding to the prologues in crti.S. */ + +#include <sysdep.h> + + .section .init,"ax",@progbits + addi r1, r1, FRAME_MIN_SIZE_PARM + ld r0, FRAME_LR_SAVE(r1) + mtlr r0 + blr + + .section .fini,"ax",@progbits + addi r1, r1, FRAME_MIN_SIZE_PARM + ld r0, FRAME_LR_SAVE(r1) + mtlr r0 + blr diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/dl-dtprocnum.h b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-dtprocnum.h new file mode 100644 index 0000000000..142714b421 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-dtprocnum.h @@ -0,0 +1,21 @@ +/* Configuration of lookup functions. PowerPC64 version. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Number of extra dynamic section entries for this architecture. By + default there are none. */ +#define DT_THISPROCNUM DT_PPC64_NUM diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/dl-irel.h b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-irel.h new file mode 100644 index 0000000000..d8f5988bc9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-irel.h @@ -0,0 +1,63 @@ +/* Machine-dependent ELF indirect relocation inline functions. + PowerPC64 version. + Copyright (C) 2009-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _DL_IREL_H +#define _DL_IREL_H + +#include <stdio.h> +#include <unistd.h> +#include <ldsodefs.h> +#include <dl-machine.h> + +#define ELF_MACHINE_IRELA 1 + +static inline Elf64_Addr +__attribute ((always_inline)) +elf_ifunc_invoke (Elf64_Addr addr) +{ + return ((Elf64_Addr (*) (unsigned long int)) (addr)) (GLRO(dl_hwcap)); +} + +static inline void +__attribute ((always_inline)) +elf_irela (const Elf64_Rela *reloc) +{ + unsigned int r_type = ELF64_R_TYPE (reloc->r_info); + + if (__glibc_likely (r_type == R_PPC64_IRELATIVE)) + { + Elf64_Addr *const reloc_addr = (void *) reloc->r_offset; + Elf64_Addr value = elf_ifunc_invoke(reloc->r_addend); + *reloc_addr = value; + } + else if (__glibc_likely (r_type == R_PPC64_JMP_IREL)) + { + Elf64_Addr *const reloc_addr = (void *) reloc->r_offset; + Elf64_Addr value = elf_ifunc_invoke(reloc->r_addend); +#if _CALL_ELF != 2 + *(Elf64_FuncDesc *) reloc_addr = *(Elf64_FuncDesc *) value; +#else + *reloc_addr = value; +#endif + } + else + __libc_fatal ("unexpected reloc type in static binary"); +} + +#endif /* dl-irel.h */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/dl-machine.c b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-machine.c new file mode 100644 index 0000000000..0eccc6621a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-machine.c @@ -0,0 +1,47 @@ +/* Machine-dependent ELF dynamic relocation functions. PowerPC64 version. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <unistd.h> +#include <ldsodefs.h> +#include <_itoa.h> +#include <dl-machine.h> + +void +_dl_reloc_overflow (struct link_map *map, + const char *name, + Elf64_Addr *const reloc_addr, + const Elf64_Sym *refsym) +{ + char buffer[128]; + char *t; + t = stpcpy (buffer, name); + t = stpcpy (t, " reloc at 0x"); + _itoa_word ((unsigned long) reloc_addr, t, 16, 0); + if (refsym) + { + const char *strtab; + + strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]); + t = stpcpy (t, " for symbol `"); + t = stpcpy (t, strtab + refsym->st_name); + t = stpcpy (t, "'"); + } + t = stpcpy (t, " out of range"); + _dl_signal_error (0, map->l_name, NULL, buffer); +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/dl-machine.h b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-machine.h new file mode 100644 index 0000000000..6391b3a558 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-machine.h @@ -0,0 +1,1036 @@ +/* Machine-dependent ELF dynamic relocation inline functions. + PowerPC64 version. + Copyright 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#ifndef dl_machine_h +#define dl_machine_h + +#define ELF_MACHINE_NAME "powerpc64" + +#include <assert.h> +#include <sys/param.h> +#include <dl-tls.h> +#include <sysdep.h> +#include <hwcapinfo.h> + +/* Translate a processor specific dynamic tag to the index + in l_info array. */ +#define DT_PPC64(x) (DT_PPC64_##x - DT_LOPROC + DT_NUM) + +#if _CALL_ELF != 2 +/* A PowerPC64 function descriptor. The .plt (procedure linkage + table) and .opd (official procedure descriptor) sections are + arrays of these. */ +typedef struct +{ + Elf64_Addr fd_func; + Elf64_Addr fd_toc; + Elf64_Addr fd_aux; +} Elf64_FuncDesc; +#endif + +#define ELF_MULT_MACHINES_SUPPORTED + +/* Return nonzero iff ELF header is compatible with the running host. */ +static inline int +elf_machine_matches_host (const Elf64_Ehdr *ehdr) +{ + /* Verify that the binary matches our ABI version. */ + if ((ehdr->e_flags & EF_PPC64_ABI) != 0) + { +#if _CALL_ELF != 2 + if ((ehdr->e_flags & EF_PPC64_ABI) != 1) + return 0; +#else + if ((ehdr->e_flags & EF_PPC64_ABI) != 2) + return 0; +#endif + } + + return ehdr->e_machine == EM_PPC64; +} + +/* Return nonzero iff ELF header is compatible with the running host, + but not this loader. */ +static inline int +elf_host_tolerates_machine (const Elf64_Ehdr *ehdr) +{ + return ehdr->e_machine == EM_PPC; +} + +/* Return nonzero iff ELF header is compatible with the running host, + but not this loader. */ +static inline int +elf_host_tolerates_class (const Elf64_Ehdr *ehdr) +{ + return ehdr->e_ident[EI_CLASS] == ELFCLASS32; +} + + +/* Return the run-time load address of the shared object, assuming it + was originally linked at zero. */ +static inline Elf64_Addr +elf_machine_load_address (void) __attribute__ ((const)); + +static inline Elf64_Addr +elf_machine_load_address (void) +{ + Elf64_Addr ret; + + /* The first entry in .got (and thus the first entry in .toc) is the + link-time TOC_base, ie. r2. So the difference between that and + the current r2 set by the kernel is how far the shared lib has + moved. */ + asm ( " ld %0,-32768(2)\n" + " subf %0,%0,2\n" + : "=r" (ret)); + return ret; +} + +/* Return the link-time address of _DYNAMIC. */ +static inline Elf64_Addr +elf_machine_dynamic (void) +{ + Elf64_Addr runtime_dynamic; + /* It's easier to get the run-time address. */ + asm ( " addis %0,2,_DYNAMIC@toc@ha\n" + " addi %0,%0,_DYNAMIC@toc@l\n" + : "=b" (runtime_dynamic)); + /* Then subtract off the load address offset. */ + return runtime_dynamic - elf_machine_load_address() ; +} + +#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) /* nothing */ + +/* The PLT uses Elf64_Rela relocs. */ +#define elf_machine_relplt elf_machine_rela + + +#ifdef HAVE_INLINED_SYSCALLS +/* We do not need _dl_starting_up. */ +# define DL_STARTING_UP_DEF +#else +# define DL_STARTING_UP_DEF \ +".LC__dl_starting_up:\n" \ +" .tc __GI__dl_starting_up[TC],__GI__dl_starting_up\n" +#endif + + +/* Initial entry point code for the dynamic linker. The C function + `_dl_start' is the real entry point; its return value is the user + program's entry point. */ +#define RTLD_START \ + asm (".pushsection \".text\"\n" \ +" .align 2\n" \ +" " ENTRY_2(_start) "\n" \ +BODY_PREFIX "_start:\n" \ +" " LOCALENTRY(_start) "\n" \ +/* We start with the following on the stack, from top: \ + argc (4 bytes); \ + arguments for program (terminated by NULL); \ + environment variables (terminated by NULL); \ + arguments for the program loader. */ \ +" mr 3,1\n" \ +" li 4,0\n" \ +" stdu 4,-128(1)\n" \ +/* Call _dl_start with one parameter pointing at argc. */ \ +" bl " DOT_PREFIX "_dl_start\n" \ +" nop\n" \ +/* Transfer control to _dl_start_user! */ \ +" b " DOT_PREFIX "_dl_start_user\n" \ +".LT__start:\n" \ +" .long 0\n" \ +" .byte 0x00,0x0c,0x24,0x40,0x00,0x00,0x00,0x00\n" \ +" .long .LT__start-" BODY_PREFIX "_start\n" \ +" .short .LT__start_name_end-.LT__start_name_start\n" \ +".LT__start_name_start:\n" \ +" .ascii \"_start\"\n" \ +".LT__start_name_end:\n" \ +" .align 2\n" \ +" " END_2(_start) "\n" \ +" .pushsection \".toc\",\"aw\"\n" \ +DL_STARTING_UP_DEF \ +".LC__rtld_local:\n" \ +" .tc _rtld_local[TC],_rtld_local\n" \ +".LC__dl_argc:\n" \ +" .tc _dl_argc[TC],_dl_argc\n" \ +".LC__dl_argv:\n" \ +" .tc __GI__dl_argv[TC],__GI__dl_argv\n" \ +".LC__dl_fini:\n" \ +" .tc _dl_fini[TC],_dl_fini\n" \ +" .popsection\n" \ +" " ENTRY_2(_dl_start_user) "\n" \ +/* Now, we do our main work of calling initialisation procedures. \ + The ELF ABI doesn't say anything about parameters for these, \ + so we just pass argc, argv, and the environment. \ + Changing these is strongly discouraged (not least because argc is \ + passed by value!). */ \ +BODY_PREFIX "_dl_start_user:\n" \ +" " LOCALENTRY(_dl_start_user) "\n" \ +/* the address of _start in r30. */ \ +" mr 30,3\n" \ +/* &_dl_argc in 29, &_dl_argv in 27, and _dl_loaded in 28. */ \ +" ld 28,.LC__rtld_local@toc(2)\n" \ +" ld 29,.LC__dl_argc@toc(2)\n" \ +" ld 27,.LC__dl_argv@toc(2)\n" \ +/* _dl_init (_dl_loaded, _dl_argc, _dl_argv, _dl_argv+_dl_argc+1). */ \ +" ld 3,0(28)\n" \ +" lwa 4,0(29)\n" \ +" ld 5,0(27)\n" \ +" sldi 6,4,3\n" \ +" add 6,5,6\n" \ +" addi 6,6,8\n" \ +" bl " DOT_PREFIX "_dl_init\n" \ +" nop\n" \ +/* Now, to conform to the ELF ABI, we have to: \ + Pass argc (actually _dl_argc) in r3; */ \ +" lwa 3,0(29)\n" \ +/* Pass argv (actually _dl_argv) in r4; */ \ +" ld 4,0(27)\n" \ +/* Pass argv+argc+1 in r5; */ \ +" sldi 5,3,3\n" \ +" add 6,4,5\n" \ +" addi 5,6,8\n" \ +/* Pass the auxiliary vector in r6. This is passed to us just after \ + _envp. */ \ +"2: ldu 0,8(6)\n" \ +" cmpdi 0,0\n" \ +" bne 2b\n" \ +" addi 6,6,8\n" \ +/* Pass a termination function pointer (in this case _dl_fini) in \ + r7. */ \ +" ld 7,.LC__dl_fini@toc(2)\n" \ +/* Pass the stack pointer in r1 (so far so good), pointing to a NULL \ + value. This lets our startup code distinguish between a program \ + linked statically, which linux will call with argc on top of the \ + stack which will hopefully never be zero, and a dynamically linked \ + program which will always have a NULL on the top of the stack. \ + Take the opportunity to clear LR, so anyone who accidentally \ + returns from _start gets SEGV. Also clear the next few words of \ + the stack. */ \ +" li 31,0\n" \ +" std 31,0(1)\n" \ +" mtlr 31\n" \ +" std 31,8(1)\n" \ +" std 31,16(1)\n" \ +" std 31,24(1)\n" \ +/* Now, call the start function descriptor at r30... */ \ +" .globl ._dl_main_dispatch\n" \ +"._dl_main_dispatch:\n" \ +" " PPC64_LOAD_FUNCPTR(30) "\n" \ +" bctr\n" \ +".LT__dl_start_user:\n" \ +" .long 0\n" \ +" .byte 0x00,0x0c,0x24,0x40,0x00,0x00,0x00,0x00\n" \ +" .long .LT__dl_start_user-" BODY_PREFIX "_dl_start_user\n" \ +" .short .LT__dl_start_user_name_end-.LT__dl_start_user_name_start\n" \ +".LT__dl_start_user_name_start:\n" \ +" .ascii \"_dl_start_user\"\n" \ +".LT__dl_start_user_name_end:\n" \ +" .align 2\n" \ +" " END_2(_dl_start_user) "\n" \ +" .popsection"); + +/* ELF_RTYPE_CLASS_COPY iff TYPE should not be allowed to resolve to + one of the main executable's symbols, as for a COPY reloc. + + To make function pointer comparisons work on most targets, the + relevant ABI states that the address of a non-local function in a + dynamically linked executable is the address of the PLT entry for + that function. This is quite reasonable since using the real + function address in a non-PIC executable would typically require + dynamic relocations in .text, something to be avoided. For such + functions, the linker emits a SHN_UNDEF symbol in the executable + with value equal to the PLT entry address. Normally, SHN_UNDEF + symbols have a value of zero, so this is a clue to ld.so that it + should treat these symbols specially. For relocations not in + ELF_RTYPE_CLASS_PLT (eg. those on function pointers), ld.so should + use the value of the executable SHN_UNDEF symbol, ie. the PLT entry + address. For relocations in ELF_RTYPE_CLASS_PLT (eg. the relocs in + the PLT itself), ld.so should use the value of the corresponding + defined symbol in the object that defines the function, ie. the + real function address. This complicates ld.so in that there are + now two possible values for a given symbol, and it gets even worse + because protected symbols need yet another set of rules. + + On PowerPC64 we don't need any of this. The linker won't emit + SHN_UNDEF symbols with non-zero values. ld.so can make all + relocations behave "normally", ie. always use the real address + like PLT relocations. So always set ELF_RTYPE_CLASS_PLT. */ + +#if _CALL_ELF != 2 +#define elf_machine_type_class(type) \ + (ELF_RTYPE_CLASS_PLT | (((type) == R_PPC64_COPY) * ELF_RTYPE_CLASS_COPY)) +#else +/* And now that you have read that large comment, you can disregard it + all for ELFv2. ELFv2 does need the special SHN_UNDEF treatment. */ +#define IS_PPC64_TLS_RELOC(R) \ + (((R) >= R_PPC64_TLS && (R) <= R_PPC64_DTPREL16_HIGHESTA) \ + || ((R) >= R_PPC64_TPREL16_HIGH && (R) <= R_PPC64_DTPREL16_HIGHA)) + +#define elf_machine_type_class(type) \ + ((((type) == R_PPC64_JMP_SLOT \ + || (type) == R_PPC64_ADDR24 \ + || IS_PPC64_TLS_RELOC (type)) * ELF_RTYPE_CLASS_PLT) \ + | (((type) == R_PPC64_COPY) * ELF_RTYPE_CLASS_COPY)) +#endif + +/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ +#define ELF_MACHINE_JMP_SLOT R_PPC64_JMP_SLOT + +/* The PowerPC never uses REL relocations. */ +#define ELF_MACHINE_NO_REL 1 +#define ELF_MACHINE_NO_RELA 0 + +/* We define an initialization function to initialize HWCAP/HWCAP2 and + platform data so it can be copied into the TCB later. This is called + very early in _dl_sysdep_start for dynamically linked binaries. */ +#ifdef SHARED +# define DL_PLATFORM_INIT dl_platform_init () + +static inline void __attribute__ ((unused)) +dl_platform_init (void) +{ + __tcb_parse_hwcap_and_convert_at_platform (); +} +#endif + +/* Stuff for the PLT. */ +#if _CALL_ELF != 2 +#define PLT_INITIAL_ENTRY_WORDS 3 +#define PLT_ENTRY_WORDS 3 +#define GLINK_INITIAL_ENTRY_WORDS 8 +/* The first 32k entries of glink can set an index and branch using two + instructions; past that point, glink uses three instructions. */ +#define GLINK_ENTRY_WORDS(I) (((I) < 0x8000)? 2 : 3) +#else +#define PLT_INITIAL_ENTRY_WORDS 2 +#define PLT_ENTRY_WORDS 1 +#define GLINK_INITIAL_ENTRY_WORDS 8 +#define GLINK_ENTRY_WORDS(I) 1 +#endif + +#define PPC_DCBST(where) asm volatile ("dcbst 0,%0" : : "r"(where) : "memory") +#define PPC_DCBT(where) asm volatile ("dcbt 0,%0" : : "r"(where) : "memory") +#define PPC_DCBF(where) asm volatile ("dcbf 0,%0" : : "r"(where) : "memory") +#define PPC_SYNC asm volatile ("sync" : : : "memory") +#define PPC_ISYNC asm volatile ("sync; isync" : : : "memory") +#define PPC_ICBI(where) asm volatile ("icbi 0,%0" : : "r"(where) : "memory") +#define PPC_DIE asm volatile ("tweq 0,0") +/* Use this when you've modified some code, but it won't be in the + instruction fetch queue (or when it doesn't matter if it is). */ +#define MODIFIED_CODE_NOQUEUE(where) \ + do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); } while (0) +/* Use this when it might be in the instruction queue. */ +#define MODIFIED_CODE(where) \ + do { PPC_DCBST(where); PPC_SYNC; PPC_ICBI(where); PPC_ISYNC; } while (0) + +/* Set up the loaded object described by MAP so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. */ +static inline int __attribute__ ((always_inline)) +elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) +{ + if (map->l_info[DT_JMPREL]) + { + Elf64_Word i; + Elf64_Word *glink = NULL; + Elf64_Xword *plt = (Elf64_Xword *) D_PTR (map, l_info[DT_PLTGOT]); + Elf64_Word num_plt_entries = (map->l_info[DT_PLTRELSZ]->d_un.d_val + / sizeof (Elf64_Rela)); + Elf64_Addr l_addr = map->l_addr; + Elf64_Dyn **info = map->l_info; + char *p; + + extern void _dl_runtime_resolve (void); + extern void _dl_profile_resolve (void); + + /* Relocate the DT_PPC64_GLINK entry in the _DYNAMIC section. + elf_get_dynamic_info takes care of the standard entries but + doesn't know exactly what to do with processor specific + entries. */ + if (info[DT_PPC64(GLINK)] != NULL) + info[DT_PPC64(GLINK)]->d_un.d_ptr += l_addr; + + if (lazy) + { + Elf64_Word glink_offset; + Elf64_Word offset; + Elf64_Addr dlrr; + + dlrr = (Elf64_Addr) (profile ? _dl_profile_resolve + : _dl_runtime_resolve); + if (profile && GLRO(dl_profile) != NULL + && _dl_name_match_p (GLRO(dl_profile), map)) + /* This is the object we are looking for. Say that we really + want profiling and the timers are started. */ + GL(dl_profile_map) = map; + +#if _CALL_ELF != 2 + /* We need to stuff the address/TOC of _dl_runtime_resolve + into doublewords 0 and 1 of plt_reserve. Then we need to + stuff the map address into doubleword 2 of plt_reserve. + This allows the GLINK0 code to transfer control to the + correct trampoline which will transfer control to fixup + in dl-machine.c. */ + { + /* The plt_reserve area is the 1st 3 doublewords of the PLT. */ + Elf64_FuncDesc *plt_reserve = (Elf64_FuncDesc *) plt; + Elf64_FuncDesc *resolve_fd = (Elf64_FuncDesc *) dlrr; + plt_reserve->fd_func = resolve_fd->fd_func; + plt_reserve->fd_toc = resolve_fd->fd_toc; + plt_reserve->fd_aux = (Elf64_Addr) map; +#ifdef RTLD_BOOTSTRAP + /* When we're bootstrapping, the opd entry will not have + been relocated yet. */ + plt_reserve->fd_func += l_addr; + plt_reserve->fd_toc += l_addr; +#endif + } +#else + /* When we don't have function descriptors, the first doubleword + of the PLT holds the address of _dl_runtime_resolve, and the + second doubleword holds the map address. */ + plt[0] = dlrr; + plt[1] = (Elf64_Addr) map; +#endif + + /* Set up the lazy PLT entries. */ + glink = (Elf64_Word *) D_PTR (map, l_info[DT_PPC64(GLINK)]); + offset = PLT_INITIAL_ENTRY_WORDS; + glink_offset = GLINK_INITIAL_ENTRY_WORDS; + for (i = 0; i < num_plt_entries; i++) + { + + plt[offset] = (Elf64_Xword) &glink[glink_offset]; + offset += PLT_ENTRY_WORDS; + glink_offset += GLINK_ENTRY_WORDS (i); + } + + /* Now, we've modified data. We need to write the changes from + the data cache to a second-level unified cache, then make + sure that stale data in the instruction cache is removed. + (In a multiprocessor system, the effect is more complex.) + Most of the PLT shouldn't be in the instruction cache, but + there may be a little overlap at the start and the end. + + Assumes that dcbst and icbi apply to lines of 16 bytes or + more. Current known line sizes are 16, 32, and 128 bytes. */ + + for (p = (char *) plt; p < (char *) &plt[offset]; p += 16) + PPC_DCBST (p); + PPC_SYNC; + } + } + return lazy; +} + +#if _CALL_ELF == 2 +/* If the PLT entry whose reloc is 'reloc' resolves to a function in + the same object, return the target function's local entry point + offset if usable. */ +static inline Elf64_Addr __attribute__ ((always_inline)) +ppc64_local_entry_offset (struct link_map *map, lookup_t sym_map, + const Elf64_Rela *reloc) +{ + const Elf64_Sym *symtab; + const Elf64_Sym *sym; + + /* If the target function is in a different object, we cannot + use the local entry point. */ + if (sym_map != map) + return 0; + + /* If the linker inserted multiple TOCs, we cannot use the + local entry point. */ + if (map->l_info[DT_PPC64(OPT)] + && (map->l_info[DT_PPC64(OPT)]->d_un.d_val & PPC64_OPT_MULTI_TOC)) + return 0; + + /* Otherwise, we can use the local entry point. Retrieve its offset + from the symbol's ELF st_other field. */ + symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]); + sym = &symtab[ELFW(R_SYM) (reloc->r_info)]; + + /* If the target function is an ifunc then the local entry offset is + for the resolver, not the final destination. */ + if (__builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0)) + return 0; + + return PPC64_LOCAL_ENTRY_OFFSET (sym->st_other); +} +#endif + +/* Change the PLT entry whose reloc is 'reloc' to call the actual + routine. */ +static inline Elf64_Addr __attribute__ ((always_inline)) +elf_machine_fixup_plt (struct link_map *map, lookup_t sym_map, + const Elf64_Rela *reloc, + Elf64_Addr *reloc_addr, Elf64_Addr finaladdr) +{ +#if _CALL_ELF != 2 + Elf64_FuncDesc *plt = (Elf64_FuncDesc *) reloc_addr; + Elf64_FuncDesc *rel = (Elf64_FuncDesc *) finaladdr; + Elf64_Addr offset = 0; + Elf64_FuncDesc zero_fd = {0, 0, 0}; + + PPC_DCBT (&plt->fd_aux); + PPC_DCBT (&plt->fd_func); + + /* If sym_map is NULL, it's a weak undefined sym; Set the plt to + zero. finaladdr should be zero already in this case, but guard + against invalid plt relocations with non-zero addends. */ + if (sym_map == NULL) + finaladdr = 0; + + /* Don't die here if finaladdr is zero, die if this plt entry is + actually called. Makes a difference when LD_BIND_NOW=1. + finaladdr may be zero for a weak undefined symbol, or when an + ifunc resolver returns zero. */ + if (finaladdr == 0) + rel = &zero_fd; + else + { + PPC_DCBT (&rel->fd_aux); + PPC_DCBT (&rel->fd_func); + } + + /* If the opd entry is not yet relocated (because it's from a shared + object that hasn't been processed yet), then manually reloc it. */ + if (finaladdr != 0 && map != sym_map && !sym_map->l_relocated +#if !defined RTLD_BOOTSTRAP && defined SHARED + /* Bootstrap map doesn't have l_relocated set for it. */ + && sym_map != &GL(dl_rtld_map) +#endif + ) + offset = sym_map->l_addr; + + /* For PPC64, fixup_plt copies the function descriptor from opd + over the corresponding PLT entry. + Initially, PLT Entry[i] is set up for lazy linking, or is zero. + For lazy linking, the fd_toc and fd_aux entries are irrelevant, + so for thread safety we write them before changing fd_func. */ + + plt->fd_aux = rel->fd_aux + offset; + plt->fd_toc = rel->fd_toc + offset; + PPC_DCBF (&plt->fd_toc); + PPC_ISYNC; + + plt->fd_func = rel->fd_func + offset; + PPC_DCBST (&plt->fd_func); + PPC_ISYNC; +#else + finaladdr += ppc64_local_entry_offset (map, sym_map, reloc); + *reloc_addr = finaladdr; +#endif + + return finaladdr; +} + +static inline void __attribute__ ((always_inline)) +elf_machine_plt_conflict (struct link_map *map, lookup_t sym_map, + const Elf64_Rela *reloc, + Elf64_Addr *reloc_addr, Elf64_Addr finaladdr) +{ +#if _CALL_ELF != 2 + Elf64_FuncDesc *plt = (Elf64_FuncDesc *) reloc_addr; + Elf64_FuncDesc *rel = (Elf64_FuncDesc *) finaladdr; + Elf64_FuncDesc zero_fd = {0, 0, 0}; + + if (sym_map == NULL) + finaladdr = 0; + + if (finaladdr == 0) + rel = &zero_fd; + + plt->fd_func = rel->fd_func; + plt->fd_aux = rel->fd_aux; + plt->fd_toc = rel->fd_toc; + PPC_DCBST (&plt->fd_func); + PPC_DCBST (&plt->fd_aux); + PPC_DCBST (&plt->fd_toc); + PPC_SYNC; +#else + finaladdr += ppc64_local_entry_offset (map, sym_map, reloc); + *reloc_addr = finaladdr; +#endif +} + +/* Return the final value of a plt relocation. */ +static inline Elf64_Addr +elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc, + Elf64_Addr value) +{ + return value + reloc->r_addend; +} + + +/* Names of the architecture-specific auditing callback functions. */ +#if _CALL_ELF != 2 +#define ARCH_LA_PLTENTER ppc64_gnu_pltenter +#define ARCH_LA_PLTEXIT ppc64_gnu_pltexit +#else +#define ARCH_LA_PLTENTER ppc64v2_gnu_pltenter +#define ARCH_LA_PLTEXIT ppc64v2_gnu_pltexit +#endif + +#endif /* dl_machine_h */ + +#ifdef RESOLVE_MAP + +#define PPC_LO(v) ((v) & 0xffff) +#define PPC_HI(v) (((v) >> 16) & 0xffff) +#define PPC_HA(v) PPC_HI ((v) + 0x8000) +#define PPC_HIGHER(v) (((v) >> 32) & 0xffff) +#define PPC_HIGHERA(v) PPC_HIGHER ((v) + 0x8000) +#define PPC_HIGHEST(v) (((v) >> 48) & 0xffff) +#define PPC_HIGHESTA(v) PPC_HIGHEST ((v) + 0x8000) +#define BIT_INSERT(var, val, mask) \ + ((var) = ((var) & ~(Elf64_Addr) (mask)) | ((val) & (mask))) + +#define dont_expect(X) __builtin_expect ((X), 0) + +extern void _dl_reloc_overflow (struct link_map *map, + const char *name, + Elf64_Addr *const reloc_addr, + const Elf64_Sym *refsym) + attribute_hidden; + +auto inline void __attribute__ ((always_inline)) +elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + void *const reloc_addr_arg) +{ + Elf64_Addr *const reloc_addr = reloc_addr_arg; + *reloc_addr = l_addr + reloc->r_addend; +} + +/* This computes the value used by TPREL* relocs. */ +auto inline Elf64_Addr __attribute__ ((always_inline, const)) +elf_machine_tprel (struct link_map *map, + struct link_map *sym_map, + const Elf64_Sym *sym, + const Elf64_Rela *reloc) +{ +#ifndef RTLD_BOOTSTRAP + if (sym_map) + { + CHECK_STATIC_TLS (map, sym_map); +#endif + return TLS_TPREL_VALUE (sym_map, sym, reloc); +#ifndef RTLD_BOOTSTRAP + } +#endif + return 0; +} + +/* Call function at address VALUE (an OPD entry) to resolve ifunc relocs. */ +auto inline Elf64_Addr __attribute__ ((always_inline)) +resolve_ifunc (Elf64_Addr value, + const struct link_map *map, const struct link_map *sym_map) +{ +#if _CALL_ELF != 2 +#ifndef RESOLVE_CONFLICT_FIND_MAP + /* The function we are calling may not yet have its opd entry relocated. */ + Elf64_FuncDesc opd; + if (map != sym_map +# if !defined RTLD_BOOTSTRAP && defined SHARED + /* Bootstrap map doesn't have l_relocated set for it. */ + && sym_map != &GL(dl_rtld_map) +# endif + && !sym_map->l_relocated) + { + Elf64_FuncDesc *func = (Elf64_FuncDesc *) value; + opd.fd_func = func->fd_func + sym_map->l_addr; + opd.fd_toc = func->fd_toc + sym_map->l_addr; + opd.fd_aux = func->fd_aux; + /* GCC 4.9+ eliminates the branch as dead code, force the odp set + dependency. */ + asm ("" : "=r" (value) : "0" (&opd), "X" (opd)); + } +#endif +#endif + return ((Elf64_Addr (*) (unsigned long int)) value) (GLRO(dl_hwcap)); +} + +/* Perform the relocation specified by RELOC and SYM (which is fully + resolved). MAP is the object containing the reloc. */ +auto inline void __attribute__ ((always_inline)) +elf_machine_rela (struct link_map *map, + const Elf64_Rela *reloc, + const Elf64_Sym *sym, + const struct r_found_version *version, + void *const reloc_addr_arg, + int skip_ifunc) +{ + Elf64_Addr *const reloc_addr = reloc_addr_arg; + const int r_type = ELF64_R_TYPE (reloc->r_info); + const Elf64_Sym *const refsym = sym; + union unaligned + { + uint16_t u2; + uint32_t u4; + uint64_t u8; + } __attribute__ ((__packed__)); + + if (r_type == R_PPC64_RELATIVE) + { + *reloc_addr = map->l_addr + reloc->r_addend; + return; + } + + if (__glibc_unlikely (r_type == R_PPC64_NONE)) + return; + + /* We need SYM_MAP even in the absence of TLS, for elf_machine_fixup_plt + and STT_GNU_IFUNC. */ + struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); + Elf64_Addr value = ((sym_map == NULL ? 0 : sym_map->l_addr + sym->st_value) + + reloc->r_addend); + + if (sym != NULL + && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0) + && __builtin_expect (sym->st_shndx != SHN_UNDEF, 1) + && __builtin_expect (!skip_ifunc, 1)) + value = resolve_ifunc (value, map, sym_map); + + /* For relocs that don't edit code, return. + For relocs that might edit instructions, break from the switch. */ + switch (r_type) + { + case R_PPC64_ADDR64: + case R_PPC64_GLOB_DAT: + *reloc_addr = value; + return; + + case R_PPC64_IRELATIVE: + if (__glibc_likely (!skip_ifunc)) + value = resolve_ifunc (value, map, sym_map); + *reloc_addr = value; + return; + + case R_PPC64_JMP_IREL: + if (__glibc_likely (!skip_ifunc)) + value = resolve_ifunc (value, map, sym_map); + /* Fall thru */ + case R_PPC64_JMP_SLOT: +#ifdef RESOLVE_CONFLICT_FIND_MAP + elf_machine_plt_conflict (map, sym_map, reloc, reloc_addr, value); +#else + elf_machine_fixup_plt (map, sym_map, reloc, reloc_addr, value); +#endif + return; + + case R_PPC64_DTPMOD64: + if (map->l_info[DT_PPC64(OPT)] + && (map->l_info[DT_PPC64(OPT)]->d_un.d_val & PPC64_OPT_TLS)) + { +#ifdef RTLD_BOOTSTRAP + reloc_addr[0] = 0; + reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET + + TLS_DTV_OFFSET); + return; +#else + if (sym_map != NULL) + { +# ifndef SHARED + CHECK_STATIC_TLS (map, sym_map); +# else + if (TRY_STATIC_TLS (map, sym_map)) +# endif + { + reloc_addr[0] = 0; + /* Set up for local dynamic. */ + reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET + + TLS_DTV_OFFSET); + return; + } + } +#endif + } +#ifdef RTLD_BOOTSTRAP + /* During startup the dynamic linker is always index 1. */ + *reloc_addr = 1; +#else + /* Get the information from the link map returned by the + resolve function. */ + if (sym_map != NULL) + *reloc_addr = sym_map->l_tls_modid; +#endif + return; + + case R_PPC64_DTPREL64: + if (map->l_info[DT_PPC64(OPT)] + && (map->l_info[DT_PPC64(OPT)]->d_un.d_val & PPC64_OPT_TLS)) + { +#ifdef RTLD_BOOTSTRAP + *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc); + return; +#else + if (sym_map != NULL) + { + /* This reloc is always preceded by R_PPC64_DTPMOD64. */ +# ifndef SHARED + assert (HAVE_STATIC_TLS (map, sym_map)); +# else + if (HAVE_STATIC_TLS (map, sym_map)) +# endif + { + *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc); + return; + } + } +#endif + } + /* During relocation all TLS symbols are defined and used. + Therefore the offset is already correct. */ +#ifndef RTLD_BOOTSTRAP + if (sym_map != NULL) + *reloc_addr = TLS_DTPREL_VALUE (sym, reloc); +#endif + return; + + case R_PPC64_TPREL64: + *reloc_addr = elf_machine_tprel (map, sym_map, sym, reloc); + return; + + case R_PPC64_TPREL16_LO_DS: + value = elf_machine_tprel (map, sym_map, sym, reloc); + if (dont_expect ((value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_TPREL16_LO_DS", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Half *) reloc_addr, value, 0xfffc); + break; + + case R_PPC64_TPREL16_DS: + value = elf_machine_tprel (map, sym_map, sym, reloc); + if (dont_expect ((value + 0x8000) >= 0x10000 || (value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_TPREL16_DS", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Half *) reloc_addr, value, 0xfffc); + break; + + case R_PPC64_TPREL16: + value = elf_machine_tprel (map, sym_map, sym, reloc); + if (dont_expect ((value + 0x8000) >= 0x10000)) + _dl_reloc_overflow (map, "R_PPC64_TPREL16", reloc_addr, refsym); + *(Elf64_Half *) reloc_addr = PPC_LO (value); + break; + + case R_PPC64_TPREL16_LO: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_LO (value); + break; + + case R_PPC64_TPREL16_HI: + value = elf_machine_tprel (map, sym_map, sym, reloc); + if (dont_expect (value + 0x80000000 >= 0x100000000LL)) + _dl_reloc_overflow (map, "R_PPC64_TPREL16_HI", reloc_addr, refsym); + *(Elf64_Half *) reloc_addr = PPC_HI (value); + break; + + case R_PPC64_TPREL16_HIGH: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HI (value); + break; + + case R_PPC64_TPREL16_HA: + value = elf_machine_tprel (map, sym_map, sym, reloc); + if (dont_expect (value + 0x80008000 >= 0x100000000LL)) + _dl_reloc_overflow (map, "R_PPC64_TPREL16_HA", reloc_addr, refsym); + *(Elf64_Half *) reloc_addr = PPC_HA (value); + break; + + case R_PPC64_TPREL16_HIGHA: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HA (value); + break; + + case R_PPC64_TPREL16_HIGHER: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HIGHER (value); + break; + + case R_PPC64_TPREL16_HIGHEST: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HIGHEST (value); + break; + + case R_PPC64_TPREL16_HIGHERA: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HIGHERA (value); + break; + + case R_PPC64_TPREL16_HIGHESTA: + value = elf_machine_tprel (map, sym_map, sym, reloc); + *(Elf64_Half *) reloc_addr = PPC_HIGHESTA (value); + break; + +#ifndef RTLD_BOOTSTRAP /* None of the following appear in ld.so */ + case R_PPC64_ADDR16_LO_DS: + if (dont_expect ((value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR16_LO_DS", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Half *) reloc_addr, value, 0xfffc); + break; + + case R_PPC64_ADDR16_LO: + *(Elf64_Half *) reloc_addr = PPC_LO (value); + break; + + case R_PPC64_ADDR16_HI: + if (dont_expect (value + 0x80000000 >= 0x100000000LL)) + _dl_reloc_overflow (map, "R_PPC64_ADDR16_HI", reloc_addr, refsym); + case R_PPC64_ADDR16_HIGH: + *(Elf64_Half *) reloc_addr = PPC_HI (value); + break; + + case R_PPC64_ADDR16_HA: + if (dont_expect (value + 0x80008000 >= 0x100000000LL)) + _dl_reloc_overflow (map, "R_PPC64_ADDR16_HA", reloc_addr, refsym); + case R_PPC64_ADDR16_HIGHA: + *(Elf64_Half *) reloc_addr = PPC_HA (value); + break; + + case R_PPC64_ADDR30: + { + Elf64_Addr delta = value - (Elf64_Xword) reloc_addr; + if (dont_expect ((delta + 0x80000000) >= 0x100000000LL + || (delta & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR30", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Word *) reloc_addr, delta, 0xfffffffc); + } + break; + + case R_PPC64_COPY: + if (dont_expect (sym == NULL)) + /* This can happen in trace mode when an object could not be found. */ + return; + if (dont_expect (sym->st_size > refsym->st_size + || (GLRO(dl_verbose) + && sym->st_size < refsym->st_size))) + { + const char *strtab; + + strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]); + _dl_error_printf ("%s: Symbol `%s' has different size" \ + " in shared object," \ + " consider re-linking\n", + RTLD_PROGNAME, strtab + refsym->st_name); + } + memcpy (reloc_addr_arg, (char *) value, + MIN (sym->st_size, refsym->st_size)); + return; + + case R_PPC64_UADDR64: + ((union unaligned *) reloc_addr)->u8 = value; + return; + + case R_PPC64_UADDR32: + ((union unaligned *) reloc_addr)->u4 = value; + return; + + case R_PPC64_ADDR32: + if (dont_expect ((value + 0x80000000) >= 0x100000000LL)) + _dl_reloc_overflow (map, "R_PPC64_ADDR32", reloc_addr, refsym); + *(Elf64_Word *) reloc_addr = value; + return; + + case R_PPC64_ADDR24: + if (dont_expect ((value + 0x2000000) >= 0x4000000 || (value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR24", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Word *) reloc_addr, value, 0x3fffffc); + break; + + case R_PPC64_ADDR16: + if (dont_expect ((value + 0x8000) >= 0x10000)) + _dl_reloc_overflow (map, "R_PPC64_ADDR16", reloc_addr, refsym); + *(Elf64_Half *) reloc_addr = value; + break; + + case R_PPC64_UADDR16: + if (dont_expect ((value + 0x8000) >= 0x10000)) + _dl_reloc_overflow (map, "R_PPC64_UADDR16", reloc_addr, refsym); + ((union unaligned *) reloc_addr)->u2 = value; + return; + + case R_PPC64_ADDR16_DS: + if (dont_expect ((value + 0x8000) >= 0x10000 || (value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR16_DS", reloc_addr, refsym); + BIT_INSERT (*(Elf64_Half *) reloc_addr, value, 0xfffc); + break; + + case R_PPC64_ADDR16_HIGHER: + *(Elf64_Half *) reloc_addr = PPC_HIGHER (value); + break; + + case R_PPC64_ADDR16_HIGHEST: + *(Elf64_Half *) reloc_addr = PPC_HIGHEST (value); + break; + + case R_PPC64_ADDR16_HIGHERA: + *(Elf64_Half *) reloc_addr = PPC_HIGHERA (value); + break; + + case R_PPC64_ADDR16_HIGHESTA: + *(Elf64_Half *) reloc_addr = PPC_HIGHESTA (value); + break; + + case R_PPC64_ADDR14: + case R_PPC64_ADDR14_BRTAKEN: + case R_PPC64_ADDR14_BRNTAKEN: + { + if (dont_expect ((value + 0x8000) >= 0x10000 || (value & 3) != 0)) + _dl_reloc_overflow (map, "R_PPC64_ADDR14", reloc_addr, refsym); + Elf64_Word insn = *(Elf64_Word *) reloc_addr; + BIT_INSERT (insn, value, 0xfffc); + if (r_type != R_PPC64_ADDR14) + { + insn &= ~(1 << 21); + if (r_type == R_PPC64_ADDR14_BRTAKEN) + insn |= 1 << 21; + if ((insn & (0x14 << 21)) == (0x04 << 21)) + insn |= 0x02 << 21; + else if ((insn & (0x14 << 21)) == (0x10 << 21)) + insn |= 0x08 << 21; + } + *(Elf64_Word *) reloc_addr = insn; + } + break; + + case R_PPC64_REL32: + *(Elf64_Word *) reloc_addr = value - (Elf64_Addr) reloc_addr; + return; + + case R_PPC64_REL64: + *reloc_addr = value - (Elf64_Addr) reloc_addr; + return; +#endif /* !RTLD_BOOTSTRAP */ + + default: + _dl_reloc_bad_type (map, r_type, 0); + return; + } + MODIFIED_CODE_NOQUEUE (reloc_addr); +} + +auto inline void __attribute__ ((always_inline)) +elf_machine_lazy_rel (struct link_map *map, + Elf64_Addr l_addr, const Elf64_Rela *reloc, + int skip_ifunc) +{ + /* elf_machine_runtime_setup handles this. */ +} + + +#endif /* RESOLVE */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/dl-trampoline.S b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-trampoline.S new file mode 100644 index 0000000000..5ec729d1f4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/dl-trampoline.S @@ -0,0 +1,500 @@ +/* PLT trampolines. PPC64 version. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <rtld-global-offsets.h> + + + .section ".text" +/* On entry r0 contains the index of the PLT entry we need to fixup + and r11 contains the link_map (from PLT0+16). The link_map becomes + parm1 (r3) and the index (r0) need to be converted to an offset + (index * 24) in parm2 (r4). */ + +#define FRAME_SIZE (FRAME_MIN_SIZE+64) +/* We need to save the registers used to pass parameters, ie. r3 thru + r10; Use local var space rather than the parameter save area, + because gcc as of 2010/05 doesn't allocate a proper stack frame for + a function that makes no calls except for __tls_get_addr and we + might be here resolving the __tls_get_addr call. */ +#define INT_PARMS FRAME_MIN_SIZE +EALIGN(_dl_runtime_resolve, 4, 0) + stdu r1,-FRAME_SIZE(r1) + cfi_adjust_cfa_offset (FRAME_SIZE) + std r3,INT_PARMS+0(r1) + mr r3,r11 + std r4,INT_PARMS+8(r1) + sldi r4,r0,1 + std r5,INT_PARMS+16(r1) + add r4,r4,r0 + std r6,INT_PARMS+24(r1) + sldi r4,r4,3 + std r7,INT_PARMS+32(r1) + mflr r0 + std r8,INT_PARMS+40(r1) +/* Store the LR in the LR Save area. */ + std r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + cfi_offset (lr, FRAME_LR_SAVE) + std r9,INT_PARMS+48(r1) + std r10,INT_PARMS+56(r1) + bl JUMPTARGET(_dl_fixup) +#ifndef SHARED + nop +#endif +/* Put the registers back. */ + ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + ld r10,INT_PARMS+56(r1) + ld r9,INT_PARMS+48(r1) + ld r8,INT_PARMS+40(r1) + ld r7,INT_PARMS+32(r1) + mtlr r0 + ld r6,INT_PARMS+24(r1) + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) +/* Prepare for calling the function returned by fixup. */ + PPC64_LOAD_FUNCPTR r3 + ld r3,INT_PARMS+0(r1) +#if _CALL_ELF == 2 +/* Restore the caller's TOC in case we jump to a local entry point. */ + ld r2,FRAME_SIZE+FRAME_TOC_SAVE(r1) +#endif +/* Unwind the stack frame, and jump. */ + addi r1,r1,FRAME_SIZE + bctr +END(_dl_runtime_resolve) +#undef FRAME_SIZE +#undef INT_PARMS + + /* Stack layout: ELFv2 ABI. + +752 previous backchain + +744 spill_r31 + +736 spill_r30 + +720 v8 + +704 v7 + +688 v6 + +672 v5 + +656 v4 + +640 v3 + +624 v2 + +608 v1 + +600 fp10 + ELFv1 ABI +592 fp9 + +592 previous backchain +584 fp8 + +584 spill_r31 +576 fp7 + +576 spill_r30 +568 fp6 + +560 v1 +560 fp5 + +552 fp4 +552 fp4 + +544 fp3 +544 fp3 + +536 fp2 +536 fp2 + +528 fp1 +528 fp1 + +520 r4 +520 r4 + +512 r3 +512 r3 + return values + +504 free + +496 stackframe + +488 lr + +480 r1 + +464 v13 + +448 v12 + +432 v11 + +416 v10 + +400 v9 + +384 v8 + +368 v7 + +352 v6 + +336 v5 + +320 v4 + +304 v3 + +288 v2 + * VMX Parms in V2-V13, V0-V1 are scratch + +284 vrsave + +280 free + +272 fp13 + +264 fp12 + +256 fp11 + +248 fp10 + +240 fp9 + +232 fp8 + +224 fp7 + +216 fp6 + +208 fp5 + +200 fp4 + +192 fp3 + +184 fp2 + +176 fp1 + * FP Parms in FP1-FP13, FP0 is a scratch register + +168 r10 + +160 r9 + +152 r8 + +144 r7 + +136 r6 + +128 r5 + +120 r4 + +112 r3 + * Integer parms in R3-R10, R0 is scratch, R1 SP, R2 is TOC + +104 parm8 + +96 parm7 + +88 parm6 + +80 parm5 + +72 parm4 + +64 parm3 + +56 parm2 + +48 parm1 + * Parameter save area + * (v1 ABI: Allocated by the call, at least 8 double words) + +40 v1 ABI: TOC save area + +32 v1 ABI: Reserved for linker + +24 v1 ABI: Reserved for compiler / v2 ABI: TOC save area + +16 LR save area + +8 CR save area + r1+0 stack back chain + */ +#if _CALL_ELF == 2 +# define FRAME_SIZE 752 +# define VR_RTN 608 +#else +# define FRAME_SIZE 592 +# define VR_RTN 560 +#endif +#define INT_RTN 512 +#define FPR_RTN 528 +#define STACK_FRAME 496 +#define CALLING_LR 488 +#define CALLING_SP 480 +#define INT_PARMS 112 +#define FPR_PARMS 176 +#define VR_PARMS 288 +#define VR_VRSAVE 284 + .section ".toc","aw" +.LC__dl_hwcap: +# ifdef SHARED + .tc _rtld_local_ro[TC],_rtld_local_ro +# else + .tc _dl_hwcap[TC],_dl_hwcap +# endif + .section ".text" + + .machine "altivec" +/* On entry r0 contains the index of the PLT entry we need to fixup + and r11 contains the link_map (from PLT0+16). The link_map becomes + parm1 (r3) and the index (r0) needs to be converted to an offset + (index * 24) in parm2 (r4). */ +#ifndef PROF +EALIGN(_dl_profile_resolve, 4, 0) +/* Spill r30, r31 to preserve the link_map* and reloc_addr, in case we + need to call _dl_call_pltexit. */ + std r31,-8(r1) + std r30,-16(r1) +/* We need to save the registers used to pass parameters, ie. r3 thru + r10; the registers are saved in a stack frame. */ + stdu r1,-FRAME_SIZE(r1) + cfi_adjust_cfa_offset (FRAME_SIZE) + cfi_offset(r31,-8) + cfi_offset(r30,-16) + std r3,INT_PARMS+0(r1) + mr r3,r11 + std r4,INT_PARMS+8(r1) + sldi r4,r0,1 /* index * 2 */ + std r5,INT_PARMS+16(r1) + add r4,r4,r0 /* index * 3 */ + std r6,INT_PARMS+24(r1) + sldi r4,r4,3 /* index * 24 == PLT offset */ + mflr r5 + std r7,INT_PARMS+32(r1) + std r8,INT_PARMS+40(r1) +/* Store the LR in the LR Save area. */ + la r8,FRAME_SIZE(r1) + std r5,FRAME_SIZE+FRAME_LR_SAVE(r1) + cfi_offset (lr, FRAME_LR_SAVE) + std r5,CALLING_LR(r1) + std r9,INT_PARMS+48(r1) + std r10,INT_PARMS+56(r1) + std r8,CALLING_SP(r1) + ld r12,.LC__dl_hwcap@toc(r2) +#ifdef SHARED + /* Load _rtld_local_ro._dl_hwcap. */ + ld r12,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r12) +#else + ld r12,0(r12) /* Load extern _dl_hwcap. */ +#endif + andis. r0,r12,(PPC_FEATURE_HAS_ALTIVEC >> 16) + beq L(saveFP) + la r10,(VR_PARMS+0)(r1) + la r9,(VR_PARMS+16)(r1) + li r11,32 + li r12,64 + stvx v2,0,r10 + stvx v3,0,r9 + + stvx v4,r11,r10 + stvx v5,r11,r9 + addi r11,r11,64 + + stvx v6,r12,r10 + stvx v7,r12,r9 + addi r12,r12,64 + + stvx v8,r11,r10 + stvx v9,r11,r9 + addi r11,r11,64 + + stvx v10,r12,r10 + stvx v11,r12,r9 + mfspr r0,VRSAVE + + stvx v12,r11,r10 + stvx v13,r11,r9 +L(saveFP): + stw r0,VR_VRSAVE(r1) +/* Save floating registers. */ + stfd fp1,FPR_PARMS+0(r1) + stfd fp2,FPR_PARMS+8(r1) + stfd fp3,FPR_PARMS+16(r1) + stfd fp4,FPR_PARMS+24(r1) + stfd fp5,FPR_PARMS+32(r1) + stfd fp6,FPR_PARMS+40(r1) + stfd fp7,FPR_PARMS+48(r1) + stfd fp8,FPR_PARMS+56(r1) + stfd fp9,FPR_PARMS+64(r1) + stfd fp10,FPR_PARMS+72(r1) + stfd fp11,FPR_PARMS+80(r1) + li r0,-1 + stfd fp12,FPR_PARMS+88(r1) + stfd fp13,FPR_PARMS+96(r1) +/* Load the extra parameters. */ + addi r6,r1,INT_PARMS + addi r7,r1,STACK_FRAME +/* Save link_map* and reloc_addr parms for later. */ + mr r31,r3 + mr r30,r4 + std r0,0(r7) + bl JUMPTARGET(_dl_profile_fixup) +#ifndef SHARED + nop +#endif +/* Test *framesizep > 0 to see if need to do pltexit processing. */ + ld r0,STACK_FRAME(r1) +/* Put the registers back. */ + lwz r12,VR_VRSAVE(r1) + cmpdi cr1,r0,0 + cmpdi cr0,r12,0 + bgt cr1,L(do_pltexit) + la r10,(VR_PARMS+0)(r1) + la r9,(VR_PARMS+16)(r1) +/* VRSAVE must be non-zero if VMX is present and VRs are in use. */ + beq L(restoreFXR) + li r11,32 + li r12,64 + lvx v2,0,r10 + lvx v3,0,r9 + + lvx v4,r11,r10 + lvx v5,r11,r9 + addi r11,r11,64 + + lvx v6,r12,r10 + lvx v7,r12,r9 + addi r12,r12,64 + + lvx v8,r11,r10 + lvx v9,r11,r9 + addi r11,r11,64 + + lvx v10,r12,r10 + lvx v11,r12,r9 + + lvx v12,r11,r10 + lvx v13,r11,r9 +L(restoreFXR): + ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + ld r10,INT_PARMS+56(r1) + ld r9,INT_PARMS+48(r1) + ld r8,INT_PARMS+40(r1) + ld r7,INT_PARMS+32(r1) + mtlr r0 + ld r6,INT_PARMS+24(r1) + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) +/* Prepare for calling the function returned by fixup. */ + PPC64_LOAD_FUNCPTR r3 + ld r3,INT_PARMS+0(r1) +#if _CALL_ELF == 2 +/* Restore the caller's TOC in case we jump to a local entry point. */ + ld r2,FRAME_SIZE+FRAME_TOC_SAVE(r1) +#endif +/* Load the floating point registers. */ + lfd fp1,FPR_PARMS+0(r1) + lfd fp2,FPR_PARMS+8(r1) + lfd fp3,FPR_PARMS+16(r1) + lfd fp4,FPR_PARMS+24(r1) + lfd fp5,FPR_PARMS+32(r1) + lfd fp6,FPR_PARMS+40(r1) + lfd fp7,FPR_PARMS+48(r1) + lfd fp8,FPR_PARMS+56(r1) + lfd fp9,FPR_PARMS+64(r1) + lfd fp10,FPR_PARMS+72(r1) + lfd fp11,FPR_PARMS+80(r1) + lfd fp12,FPR_PARMS+88(r1) + lfd fp13,FPR_PARMS+96(r1) +/* Unwind the stack frame, and jump. */ + ld r31,FRAME_SIZE-8(r1) + ld r30,FRAME_SIZE-16(r1) + addi r1,r1,FRAME_SIZE + bctr + +L(do_pltexit): + la r10,(VR_PARMS+0)(r1) + la r9,(VR_PARMS+16)(r1) + beq L(restoreFXR2) + li r11,32 + li r12,64 + lvx v2,0,r10 + lvx v3,0,r9 + + lvx v4,r11,r10 + lvx v5,r11,r9 + addi r11,r11,64 + + lvx v6,r12,r10 + lvx v7,r12,r9 + addi r12,r12,64 + + lvx v8,r11,r10 + lvx v9,r11,r9 + addi r11,r11,64 + + lvx v10,r12,r10 + lvx v11,r12,r9 + + lvx v12,r11,r10 + lvx v13,r11,r9 +L(restoreFXR2): + ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + ld r10,INT_PARMS+56(r1) + ld r9,INT_PARMS+48(r1) + ld r8,INT_PARMS+40(r1) + ld r7,INT_PARMS+32(r1) + mtlr r0 + ld r6,INT_PARMS+24(r1) + ld r5,INT_PARMS+16(r1) + ld r4,INT_PARMS+8(r1) +/* Prepare for calling the function returned by fixup. */ + std r2,FRAME_TOC_SAVE(r1) + PPC64_LOAD_FUNCPTR r3 + ld r3,INT_PARMS+0(r1) +/* Load the floating point registers. */ + lfd fp1,FPR_PARMS+0(r1) + lfd fp2,FPR_PARMS+8(r1) + lfd fp3,FPR_PARMS+16(r1) + lfd fp4,FPR_PARMS+24(r1) + lfd fp5,FPR_PARMS+32(r1) + lfd fp6,FPR_PARMS+40(r1) + lfd fp7,FPR_PARMS+48(r1) + lfd fp8,FPR_PARMS+56(r1) + lfd fp9,FPR_PARMS+64(r1) + lfd fp10,FPR_PARMS+72(r1) + lfd fp11,FPR_PARMS+80(r1) + lfd fp12,FPR_PARMS+88(r1) + lfd fp13,FPR_PARMS+96(r1) +/* Call the target function. */ + bctrl + ld r2,FRAME_TOC_SAVE(r1) + lwz r12,VR_VRSAVE(r1) +/* But return here and store the return values. */ + std r3,INT_RTN(r1) + std r4,INT_RTN+8(r1) + stfd fp1,FPR_RTN+0(r1) + stfd fp2,FPR_RTN+8(r1) + cmpdi cr0,r12,0 + la r10,VR_RTN(r1) + stfd fp3,FPR_RTN+16(r1) + stfd fp4,FPR_RTN+24(r1) +#if _CALL_ELF == 2 + la r12,VR_RTN+16(r1) + stfd fp5,FPR_RTN+32(r1) + stfd fp6,FPR_RTN+40(r1) + li r5,32 + li r6,64 + stfd fp7,FPR_RTN+48(r1) + stfd fp8,FPR_RTN+56(r1) + stfd fp9,FPR_RTN+64(r1) + stfd fp10,FPR_RTN+72(r1) +#endif + mr r3,r31 + mr r4,r30 + beq L(callpltexit) + stvx v2,0,r10 +#if _CALL_ELF == 2 + stvx v3,0,r12 + stvx v4,r5,r10 + stvx v5,r5,r12 + addi r5,r5,64 + stvx v6,r6,r10 + stvx v7,r6,r12 + stvx v8,r5,r10 + stvx v9,r5,r12 +#endif +L(callpltexit): + addi r5,r1,INT_PARMS + addi r6,r1,INT_RTN + bl JUMPTARGET(_dl_call_pltexit) +#ifndef SHARED + nop +#endif +/* Restore the return values from target function. */ + lwz r12,VR_VRSAVE(r1) + ld r3,INT_RTN(r1) + ld r4,INT_RTN+8(r1) + lfd fp1,FPR_RTN+0(r1) + lfd fp2,FPR_RTN+8(r1) + cmpdi cr0,r12,0 + la r11,VR_RTN(r1) + lfd fp3,FPR_RTN+16(r1) + lfd fp4,FPR_RTN+24(r1) +#if _CALL_ELF == 2 + la r12,VR_RTN+16(r1) + lfd fp5,FPR_RTN+32(r1) + lfd fp6,FPR_RTN+40(r1) + li r30,32 + li r31,64 + lfd fp7,FPR_RTN+48(r1) + lfd fp8,FPR_RTN+56(r1) + lfd fp9,FPR_RTN+64(r1) + lfd fp10,FPR_RTN+72(r1) +#endif + beq L(pltexitreturn) + lvx v2,0,r11 +#if _CALL_ELF == 2 + lvx v3,0,r12 + lvx v4,r30,r11 + lvx v5,r30,r12 + addi r30,r30,64 + lvx v6,r31,r11 + lvx v7,r31,r12 + lvx v8,r30,r11 + lvx v9,r30,r12 +#endif +L(pltexitreturn): + ld r0,FRAME_SIZE+FRAME_LR_SAVE(r1) + ld r31,FRAME_SIZE-8(r1) + ld r30,FRAME_SIZE-16(r1) + mtlr r0 + ld r1,0(r1) + blr +END(_dl_profile_resolve) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/entry.h b/REORG.TODO/sysdeps/powerpc/powerpc64/entry.h new file mode 100644 index 0000000000..9131d9ceb6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/entry.h @@ -0,0 +1,37 @@ +/* Finding the entry point and start of text. PowerPC64 version. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + + +#ifndef __ASSEMBLY__ +extern void _start (void); +#endif + +#define ENTRY_POINT _start + +#if _CALL_ELF != 2 +/* We have to provide a special declaration. */ +#define ENTRY_POINT_DECL(class) class void _start (void); + +/* Use the address of ._start as the lowest address for which we need + to keep profiling records. We can't copy the ia64 scheme as our + entry poiny address is really the address of the function + descriptor, not the actual function entry. */ +#define TEXT_START \ + ({ extern unsigned long int _start_as_data[] asm ("_start"); \ + _start_as_data[0]; }) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/ffsll.c b/REORG.TODO/sysdeps/powerpc/powerpc64/ffsll.c new file mode 100644 index 0000000000..ae18f127a0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/ffsll.c @@ -0,0 +1,37 @@ +/* Find first set bit in a word, counted from least significant end. + For PowerPC. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Torbjorn Granlund (tege@sics.se). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define ffsl __something_else +#include <limits.h> +#include <string.h> + +#undef ffs + +int +__ffsll (long long int x) +{ + int cnt; + + asm ("cntlzd %0,%1" : "=r" (cnt) : "r" (x & -x)); + return 64 - cnt; +} +weak_alias (__ffsll, ffsll) +#undef ffsl +weak_alias (__ffsll, ffsl) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile new file mode 100644 index 0000000000..317a988854 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile @@ -0,0 +1,44 @@ +ifeq ($(subdir),math) +sysdep_routines += s_isnan-power7 s_isnan-power6x s_isnan-power6 \ + s_isnan-power5 s_isnan-ppc64 s_copysign-power6 \ + s_copysign-ppc64 s_finite-power7 s_finite-ppc64 \ + s_finitef-ppc64 s_isinff-ppc64 s_isinf-power7 \ + s_isinf-ppc64 s_modf-power5+ s_modf-ppc64 \ + s_modff-power5+ s_modff-ppc64 s_isnan-power8 \ + s_isinf-power8 s_finite-power8 + +libm-sysdep_routines += s_isnan-power7 s_isnan-power6x s_isnan-power6 \ + s_isnan-power5 s_isnan-ppc64 s_llround-power6x \ + s_llround-power5+ s_llround-ppc64 s_ceil-power5+ \ + s_ceil-ppc64 s_ceilf-power5+ s_ceilf-ppc64 \ + s_floor-power5+ s_floor-ppc64 s_floorf-power5+ \ + s_floorf-ppc64 s_round-power5+ s_round-ppc64 \ + s_roundf-power5+ s_roundf-ppc64 s_trunc-power5+ \ + s_trunc-ppc64 s_truncf-power5+ s_truncf-ppc64 \ + s_copysign-power6 s_copysign-ppc64 s_llrint-power6x \ + s_llrint-ppc64 s_finite-power7 s_finite-ppc64 \ + s_finitef-ppc64 s_isinff-ppc64 s_isinf-power7 \ + s_isinf-ppc64 s_logb-power7 s_logbf-power7 \ + s_logbl-power7 s_logb-ppc64 s_logbf-ppc64 \ + s_logbl-ppc64 s_modf-power5+ s_modf-ppc64 \ + s_modff-power5+ s_modff-ppc64 e_hypot-ppc64 \ + e_hypot-power7 e_hypotf-ppc64 e_hypotf-power7 \ + s_isnan-power8 s_isinf-power8 s_finite-power8 \ + s_llrint-power8 s_llround-power8 \ + e_expf-power8 e_expf-ppc64 \ + s_sinf-ppc64 s_sinf-power8 \ + s_cosf-ppc64 s_cosf-power8 + +CFLAGS-s_logbf-power7.c = -mcpu=power7 +CFLAGS-s_logbl-power7.c = -mcpu=power7 +CFLAGS-s_logb-power7.c = -mcpu=power7 +CFLAGS-s_modf-power5+.c = -mcpu=power5+ +CFLAGS-s_modff-power5+.c = -mcpu=power5+ +CFLAGS-e_hypot-power7.c = -mcpu=power7 +CFLAGS-e_hypotf-power7.c = -mcpu=power7 + +# These files quiet sNaNs in a way that is optimized away without +# -fsignaling-nans. +CFLAGS-s_modf-ppc64.c += -fsignaling-nans +CFLAGS-s_modff-ppc64.c += -fsignaling-nans +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-power8.S new file mode 100644 index 0000000000..1e6cc51d9e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-power8.S @@ -0,0 +1,26 @@ +/* __ieee754_expf() POWER8 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_expf __ieee754_expf_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c new file mode 100644 index 0000000000..b236290ea2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf-ppc64.c @@ -0,0 +1,24 @@ +/* __ieee_expf() PowerPC64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_expf __ieee754_expf_ppc64 + +#include <sysdeps/ieee754/flt-32/e_expf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c new file mode 100644 index 0000000000..577093675c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_expf.c @@ -0,0 +1,31 @@ +/* Multiple versions of ieee754_expf. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include "init-arch.h" + +extern __typeof (__ieee754_expf) __ieee754_expf_ppc64 attribute_hidden; +extern __typeof (__ieee754_expf) __ieee754_expf_power8 attribute_hidden; + +libc_ifunc (__ieee754_expf, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __ieee754_expf_power8 + : __ieee754_expf_ppc64); + +strong_alias (__ieee754_expf, __expf_finite) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c new file mode 100644 index 0000000000..dbe9b33e2e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c @@ -0,0 +1,19 @@ +/* __ieee_hypot() POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c new file mode 100644 index 0000000000..baebb36ae3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c @@ -0,0 +1,26 @@ +/* __ieee_hypot() PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_hypot __ieee754_hypot_ppc64 + +#include <sysdeps/powerpc/fpu/e_hypot.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c new file mode 100644 index 0000000000..6a3d60a830 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c @@ -0,0 +1,32 @@ +/* Multiple versions of ieee754_hypot. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ieee754_hypot) __ieee754_hypot_ppc64 attribute_hidden; +extern __typeof (__ieee754_hypot) __ieee754_hypot_power7 attribute_hidden; + +libc_ifunc (__ieee754_hypot, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __ieee754_hypot_power7 + : __ieee754_hypot_ppc64); + +strong_alias (__ieee754_hypot, __hypot_finite) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c new file mode 100644 index 0000000000..70584863f7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c @@ -0,0 +1,19 @@ +/* __ieee_hypotf() POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c new file mode 100644 index 0000000000..839e94e56c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c @@ -0,0 +1,26 @@ +/* __ieee_hypot() PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef strong_alias +#define strong_alias(a, b) + +#define __ieee754_hypotf __ieee754_hypotf_ppc64 + +#include <sysdeps/powerpc/fpu/e_hypotf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c new file mode 100644 index 0000000000..2c8112d3b2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c @@ -0,0 +1,32 @@ +/* Multiple versions of ieee754_hypot. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ieee754_hypotf) __ieee754_hypotf_ppc64 attribute_hidden; +extern __typeof (__ieee754_hypotf) __ieee754_hypotf_power7 attribute_hidden; + +libc_ifunc (__ieee754_hypotf, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __ieee754_hypotf_power7 + : __ieee754_hypotf_ppc64); + +strong_alias (__ieee754_hypotf, __hypotf_finite) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil-power5+.S new file mode 100644 index 0000000000..bbea647da7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil-power5+.S @@ -0,0 +1,31 @@ +/* ceil function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __ceil __ceil_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil-ppc64.S new file mode 100644 index 0000000000..c19bb42329 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil-ppc64.S @@ -0,0 +1,31 @@ +/* ceil function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __ceil __ceil_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_ceil.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil.c new file mode 100644 index 0000000000..968e8cb17e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceil.c @@ -0,0 +1,40 @@ +/* Multiple versions of ceil. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ceil) __ceil_ppc64 attribute_hidden; +extern __typeof (__ceil) __ceil_power5plus attribute_hidden; + +libc_ifunc (__ceil, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __ceil_power5plus + : __ceil_ppc64); + +weak_alias (__ceil, ceil) + +#ifdef NO_LONG_DOUBLE +strong_alias (__ceil, __ceill) +weak_alias (__ceil, ceill) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __ceil, ceill, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf-power5+.S new file mode 100644 index 0000000000..8e875ce679 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf-power5+.S @@ -0,0 +1,26 @@ +/* ceilf function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __ceilf __ceilf_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf-ppc64.S new file mode 100644 index 0000000000..c9d31da2a7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf-ppc64.S @@ -0,0 +1,26 @@ +/* ceilf function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __ceilf __ceilf_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_ceilf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf.c new file mode 100644 index 0000000000..7d4a028fe5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_ceilf.c @@ -0,0 +1,32 @@ +/* Multiple versions of ceilf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__ceilf) __ceilf_ppc64 attribute_hidden; +extern __typeof (__ceilf) __ceilf_power5plus attribute_hidden; + +libc_ifunc (__ceilf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __ceilf_power5plus + : __ceilf_ppc64); + +weak_alias (__ceilf, ceilf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign-power6.S new file mode 100644 index 0000000000..d59fbfae73 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign-power6.S @@ -0,0 +1,33 @@ +/* copysign(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a, b, c, d) +#undef hidden_def +#define hidden_def(name) + +#define __copysign __copysign_power6 + +#include <sysdeps/powerpc/powerpc64/fpu/s_copysign.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign-ppc64.S new file mode 100644 index 0000000000..3f4fbc96ad --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign-ppc64.S @@ -0,0 +1,35 @@ +/* copysign(). PowerPC64 default version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a, b, c, d) + +#define __copysign __copysign_ppc64 +#undef hidden_def +#define hidden_def(name) \ + strong_alias (__copysign_ppc64, __GI___copysign) + +#include <sysdeps/powerpc/powerpc64/fpu/s_copysign.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign.c new file mode 100644 index 0000000000..2bfb625bf7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysign.c @@ -0,0 +1,51 @@ +/* Multiple versions of copysign. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Redefine copysign so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias below. */ +#undef __copysign +#define __copysign __redirect_copysign +#include <math.h> +#include <math_ldbl_opt.h> +#undef __copysign +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__redirect_copysign) __copysign_ppc64 attribute_hidden; +extern __typeof (__redirect_copysign) __copysign_power6 attribute_hidden; + +extern __typeof (__redirect_copysign) __libm_copysign; +libc_ifunc (__libm_copysign, + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __copysign_power6 + : __copysign_ppc64); + +strong_alias (__libm_copysign, __copysign) +weak_alias (__copysign, copysign) + +#ifdef NO_LONG_DOUBLE +weak_alias (__copysign,copysignl) +strong_alias(__copysign,__copysignl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __copysign, copysignl, GLIBC_2_0); +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __copysign, copysignl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysignf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysignf.c new file mode 100644 index 0000000000..c9be2b6811 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_copysignf.c @@ -0,0 +1,32 @@ +/* Multiple versions of copysignf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +/* It's safe to use double-precision implementation for single-precision. */ +extern __typeof (__copysignf) __copysign_ppc64 attribute_hidden; +extern __typeof (__copysignf) __copysign_power6 attribute_hidden; + +libc_ifunc (__copysignf, + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __copysign_power6 + : __copysign_ppc64); + +weak_alias (__copysignf, copysignf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf-power8.S new file mode 100644 index 0000000000..ee00a2c43a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf-power8.S @@ -0,0 +1,26 @@ +/* cosf function. PowerPC64/power8 version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __cosf __cosf_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_cosf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf-ppc64.c new file mode 100644 index 0000000000..635624c538 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf-ppc64.c @@ -0,0 +1,26 @@ +/* cosf function. PowerPC64 default version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __cosf __cosf_ppc64 + +#include <sysdeps/powerpc/fpu/s_cosf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf.c new file mode 100644 index 0000000000..acf2a59d69 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_cosf.c @@ -0,0 +1,31 @@ +/* Multiple versions of cosf. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__cosf) __cosf_ppc64 attribute_hidden; +extern __typeof (__cosf) __cosf_power8 attribute_hidden; + +libc_ifunc (__cosf, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __cosf_power8 + : __cosf_ppc64); + +weak_alias (__cosf, cosf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power7.S new file mode 100644 index 0000000000..9220383ee6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power7.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __finite __finite_power7 + +#include <sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power8.S new file mode 100644 index 0000000000..fa878ab3e1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-power8.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER7 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __finite __finite_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-ppc64.c new file mode 100644 index 0000000000..fabd9b0e3d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite-ppc64.c @@ -0,0 +1,34 @@ +/* finite(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef strong_alias +#define strong_alias(a, b) + +#define FINITE __finite_ppc64 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__finite_ppc64, __GI___finite, __finite_ppc64); +#endif + +#include <sysdeps/ieee754/dbl-64/s_finite.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite.c new file mode 100644 index 0000000000..a5ec36b72f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finite.c @@ -0,0 +1,60 @@ +/* Multiple versions of finite. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __finite __redirect___finite +#define __finitef __redirect___finitef +#define __finitel __redirect___finitel +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__finite) __finite_ppc64 attribute_hidden; +extern __typeof (__finite) __finite_power7 attribute_hidden; +extern __typeof (__finite) __finite_power8 attribute_hidden; +#undef __finite +#undef __finitef +#undef __finitel + +libc_ifunc_redirected (__redirect___finite, __finite, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __finite_power8 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __finite_power7 + : __finite_ppc64); + +weak_alias (__finite, finite) + +#ifdef NO_LONG_DOUBLE +strong_alias (__finite, __finitel) +weak_alias (__finite, finitel) +#endif + +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, finite, finitel, GLIBC_2_0); +# endif +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_1) +compat_symbol (libm, __finite, __finitel, GLIBC_2_1); +# endif +#else +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __finite, __finitel, GLIBC_2_0); +compat_symbol (libc, finite, finitel, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef-ppc64.c new file mode 100644 index 0000000000..c6f8033e6e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef-ppc64.c @@ -0,0 +1,32 @@ +/* finitef(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define FINITEF __finitef_ppc64 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__finitef_ppc64, __GI___finitef, __finitef_ppc64); +#endif + +#include <sysdeps/ieee754/flt-32/s_finitef.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef.c new file mode 100644 index 0000000000..cdd7824efb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_finitef.c @@ -0,0 +1,37 @@ +/* Multiple versions of finitef. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __finitef __redirect___finitef +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__finitef) __finitef_ppc64 attribute_hidden; +/* The double-precision version also works for single-precision. */ +extern __typeof (__finitef) __finite_power7 attribute_hidden; +extern __typeof (__finitef) __finite_power8 attribute_hidden; +#undef __finitef + +libc_ifunc_redirected (__redirect___finitef, __finitef, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __finite_power8 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __finite_power7 + : __finitef_ppc64); + +weak_alias (__finitef, finitef) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor-power5+.S new file mode 100644 index 0000000000..24f2460693 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor-power5+.S @@ -0,0 +1,31 @@ +/* floor function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __floor __floor_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor-ppc64.S new file mode 100644 index 0000000000..5ec9a33d89 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor-ppc64.S @@ -0,0 +1,31 @@ +/* floor function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __floor __floor_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_floor.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor.c new file mode 100644 index 0000000000..6ab7a35490 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floor.c @@ -0,0 +1,40 @@ +/* Multiple versions of floor. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__floor) __floor_ppc64 attribute_hidden; +extern __typeof (__floor) __floor_power5plus attribute_hidden; + +libc_ifunc (__floor, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __floor_power5plus + : __floor_ppc64); + +weak_alias (__floor, floor) + +#ifdef NO_LONG_DOUBLE +strong_alias (__floor, __floorl) +weak_alias (__floor, floorl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __floor, floorl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf-power5+.S new file mode 100644 index 0000000000..8b621de68e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf-power5+.S @@ -0,0 +1,26 @@ +/* floorf function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __floorf __floorf_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf-ppc64.S new file mode 100644 index 0000000000..3feea6e162 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf-ppc64.S @@ -0,0 +1,27 @@ +/* floorf function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __floorf __floorf_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_floorf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf.c new file mode 100644 index 0000000000..ee96536247 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_floorf.c @@ -0,0 +1,32 @@ +/* Multiple versions of floorf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__floorf) __floorf_ppc64 attribute_hidden; +extern __typeof (__floorf) __floorf_power5plus attribute_hidden; + +libc_ifunc (__floorf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __floorf_power5plus + : __floorf_ppc64); + +weak_alias (__floorf, floorf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power7.S new file mode 100644 index 0000000000..33a7e3de1a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power7.S @@ -0,0 +1,33 @@ +/* isinf(). PowerPC64/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __isinf __isinf_power7 + +#include <sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power8.S new file mode 100644 index 0000000000..b630696927 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-power8.S @@ -0,0 +1,33 @@ +/* isinf(). PowerPC64/POWER8 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __isinf __isinf_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-ppc64.c new file mode 100644 index 0000000000..e7f64438b4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf-ppc64.c @@ -0,0 +1,33 @@ +/* isinf(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef strong_alias +#define strong_alias(a, b) + +#define __isinf __isinf_ppc64 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__isinf_ppc64, __GI___isinf, __isinf_ppc64); +#endif + +#include <sysdeps/ieee754/dbl-64/s_isinf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf.c new file mode 100644 index 0000000000..9c6789c7bd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinf.c @@ -0,0 +1,53 @@ +/* Multiple versions of isinf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __isinf __redirect___isinf +#define __isinff __redirect___isinff +#define __isinfl __redirect___isinfl +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__isinf) __isinf_ppc64 attribute_hidden; +extern __typeof (__isinf) __isinf_power7 attribute_hidden; +extern __typeof (__isinf) __isinf_power8 attribute_hidden; +#undef __isinf +#undef __isinff +#undef __isinfl + +libc_ifunc_redirected (__redirect___isinf, __isinf, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __isinf_power8 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isinf_power7 + : __isinf_ppc64); + +weak_alias (__isinf, isinf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isinf, __isinfl) +weak_alias (__isinf, isinfl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0); +compat_symbol (libc, isinf, isinfl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff-ppc64.c new file mode 100644 index 0000000000..e58e0b53be --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff-ppc64.c @@ -0,0 +1,31 @@ +/* isinff(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __isinff __isinff_ppc64 +#ifdef SHARED +# undef hidden_def +# define hidden_def(a) \ + __hidden_ver1 (__isinff_ppc64, __GI___isinff, __isinff_ppc64); +#endif + +#include <sysdeps/ieee754/flt-32/s_isinff.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff.c new file mode 100644 index 0000000000..439e0b80d1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isinff.c @@ -0,0 +1,38 @@ +/* Multiple versions of isinf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __isinff __redirect___isinff +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__isinff) __isinff_ppc64 attribute_hidden; +/* The double-precision version also works for single-precision. */ +extern __typeof (__isinff) __isinf_power7 attribute_hidden; +extern __typeof (__isinff) __isinf_power8 attribute_hidden; +#undef __isinff + +libc_ifunc_redirected (__redirect___isinff, __isinff, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __isinf_power8 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isinf_power7 + : __isinff_ppc64); + +weak_alias (__isinff, isinff) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power5.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power5.S new file mode 100644 index 0000000000..18d368a63b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power5.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER5 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power5 + +#include <sysdeps/powerpc/powerpc64/power5/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6.S new file mode 100644 index 0000000000..7f0eae0430 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER6 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power6 + +#include <sysdeps/powerpc/powerpc64/power6/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6x.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6x.S new file mode 100644 index 0000000000..aa283096ae --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power6x.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER6X version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power6x + +#include <sysdeps/powerpc/powerpc64/power6x/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power7.S new file mode 100644 index 0000000000..b67d58e2ea --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power7.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power7 + +#include <sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power8.S new file mode 100644 index 0000000000..03151b3087 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-power8.S @@ -0,0 +1,33 @@ +/* isnan(). PowerPC64/POWER7 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef hidden_def +#define hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, symbol, ver) + +#define __isnan __isnan_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-ppc64.S new file mode 100644 index 0000000000..ee219c14be --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan-ppc64.S @@ -0,0 +1,32 @@ +/* isnan(). PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) + +#define __isnan __isnan_ppc64 +#undef hidden_def +#define hidden_def(name) \ + .globl __GI___isnan ; .set __GI___isnan,__isnan_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_isnan.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan.c new file mode 100644 index 0000000000..3cfe1793da --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnan.c @@ -0,0 +1,62 @@ +/* Multiple versions of isnan. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __isnan __redirect___isnan +#define __isnanf __redirect___isnanf +#define __isnanl __redirect___isnanl +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__isnan) __isnan_ppc64 attribute_hidden; +extern __typeof (__isnan) __isnan_power5 attribute_hidden; +extern __typeof (__isnan) __isnan_power6 attribute_hidden; +extern __typeof (__isnan) __isnan_power6x attribute_hidden; +extern __typeof (__isnan) __isnan_power7 attribute_hidden; +extern __typeof (__isnan) __isnan_power8 attribute_hidden; +#undef __isnan +#undef __isnanf +#undef __isnanl + +libc_ifunc_redirected (__redirect___isnan, __isnan, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __isnan_power8 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isnan_power7 + : (hwcap & PPC_FEATURE_POWER6_EXT) + ? __isnan_power6x + : (hwcap & PPC_FEATURE_ARCH_2_05) + ? __isnan_power6 + : (hwcap & PPC_FEATURE_POWER5) + ? __isnan_power5 + : __isnan_ppc64); + +weak_alias (__isnan, isnan) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnanf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnanf.c new file mode 100644 index 0000000000..958c373245 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_isnanf.c @@ -0,0 +1,44 @@ +/* Multiple versions of isnan. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include "init-arch.h" + +/* The double-precision implementation also works for the single one. */ +extern __typeof (__isnanf) __isnan_ppc64 attribute_hidden; +extern __typeof (__isnanf) __isnan_power5 attribute_hidden; +extern __typeof (__isnanf) __isnan_power6 attribute_hidden; +extern __typeof (__isnanf) __isnan_power6x attribute_hidden; +extern __typeof (__isnanf) __isnan_power7 attribute_hidden; +extern __typeof (__isnanf) __isnan_power8 attribute_hidden; + +libc_ifunc_hidden (__isnanf, __isnanf, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __isnan_power8 + : (hwcap & PPC_FEATURE_ARCH_2_06) + ? __isnan_power7 + : (hwcap & PPC_FEATURE_POWER6_EXT) + ? __isnan_power6x + : (hwcap & PPC_FEATURE_ARCH_2_05) + ? __isnan_power6 + : (hwcap & PPC_FEATURE_POWER5) + ? __isnan_power5 + : __isnan_ppc64); + +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power6x.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power6x.S new file mode 100644 index 0000000000..f9b1616be3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power6x.S @@ -0,0 +1,31 @@ +/* Round double to long int. PowerPC64/POWER6X default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llrint __llrint_power6x + +#include <sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power8.S new file mode 100644 index 0000000000..b7f5276a66 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-power8.S @@ -0,0 +1,31 @@ +/* Round double to long int. PowerPC64/POWER6X default version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llrint __llrint_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-ppc64.S new file mode 100644 index 0000000000..b92dafbcdb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint-ppc64.S @@ -0,0 +1,31 @@ +/* Round double to long int. PowerPC32 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llrint __llrint_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_llrint.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint.c new file mode 100644 index 0000000000..8db494cfde --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llrint.c @@ -0,0 +1,60 @@ +/* Multiple versions of llrint. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Redefine lrint/__lrint so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias below. */ +#define lrint __hidden_lrint +#define __lrint __hidden___lrint + +#include <math.h> +#include <math_ldbl_opt.h> +#undef lrint +#undef __lrint +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__llrint) __llrint_ppc64 attribute_hidden; +extern __typeof (__llrint) __llrint_power6x attribute_hidden; +extern __typeof (__llrint) __llrint_power8 attribute_hidden; + +libc_ifunc (__llrint, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __llrint_power8 : + (hwcap & PPC_FEATURE_POWER6_EXT) + ? __llrint_power6x + : __llrint_ppc64); + +weak_alias (__llrint, llrint) +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1); +#endif + +/* long has the same width as long long on PowerPC64. */ +strong_alias (__llrint, __lrint) +weak_alias (__lrint, lrint) +#ifdef NO_LONG_DOUBLE +strong_alias (__lrint, __lrintl) +weak_alias (__lrint, lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power5+.S new file mode 100644 index 0000000000..b8305ce968 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power5+.S @@ -0,0 +1,32 @@ +/* llround(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llround __llround_power5plus +#define __lround __lround_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power6x.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power6x.S new file mode 100644 index 0000000000..1145aff2a8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power6x.S @@ -0,0 +1,32 @@ +/* llround(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __llround __llround_power6x +#define __lround __lround_power6x + +#include <sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power8.S new file mode 100644 index 0000000000..8d6190df89 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-power8.S @@ -0,0 +1,31 @@ +/* llround(). PowerPC64 default version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(name, alias) +#undef strong_alias +#define strong_alias(name, alias) +#undef compat_symbol +#define compat_symbol(lib, name, alias, ver) + +#define __llround __llround_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-ppc64.S new file mode 100644 index 0000000000..8e6f9aed78 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround-ppc64.S @@ -0,0 +1,28 @@ +/* llround(). PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __llround __llround_ppc64 +#define __lround __lround_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_llround.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround.c new file mode 100644 index 0000000000..cb1a446158 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_llround.c @@ -0,0 +1,63 @@ +/* Multiple versions of llround. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define lround __hidden_lround +#define __lround __hidden___lround + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__llround) __llround_ppc64 attribute_hidden; +extern __typeof (__llround) __llround_power5plus attribute_hidden; +extern __typeof (__llround) __llround_power6x attribute_hidden; +extern __typeof (__llround) __llround_power8 attribute_hidden; + +libc_ifunc (__llround, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __llround_power8 : + (hwcap & PPC_FEATURE_POWER6_EXT) + ? __llround_power6x : + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __llround_power5plus + : __llround_ppc64); + +weak_alias (__llround, llround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1); +compat_symbol (libm, llround, lroundl, GLIBC_2_1); +#endif + +/* long has the same width as long long on PPC64. */ +#undef lround +#undef __lround +strong_alias (__llround, __lround) +weak_alias (__llround, lround) +#ifdef NO_LONG_DOUBLE +strong_alias (__llround, __llroundl) +weak_alias (__llround, llroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-power7.c new file mode 100644 index 0000000000..73b5e2d48c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-power7.c @@ -0,0 +1,19 @@ +/* logb(). PowerPC64/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logb-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-ppc64.c new file mode 100644 index 0000000000..e428b9a29a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb-ppc64.c @@ -0,0 +1,28 @@ +/* logb(). PowerPC32/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) +#undef strong_alias +#define strong_alias(a, b) + +#define __logb __logb_ppc64 + +#include <sysdeps/ieee754/dbl-64/s_logb.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb.c new file mode 100644 index 0000000000..d70919e3d4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logb.c @@ -0,0 +1,41 @@ +/* Multiple versions of logb. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__logb) __logb_ppc64 attribute_hidden; +extern __typeof (__logb) __logb_power7 attribute_hidden; + +libc_ifunc (__logb, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __logb_power7 + : __logb_ppc64); + +weak_alias (__logb, logb) + +#ifdef NO_LONG_DOUBLE +strong_alias (__logb, __logbl) +weak_alias (__logb, logbl) +#endif + +#if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, logb, logbl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-power7.c new file mode 100644 index 0000000000..02e04318e5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-power7.c @@ -0,0 +1,19 @@ +/* logb(). PowerPC64/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbf-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-ppc64.c new file mode 100644 index 0000000000..147b710c73 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf-ppc64.c @@ -0,0 +1,26 @@ +/* logbf(). PowerPC64 default implementation. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __logbf __logbf_ppc64 + +#include <sysdeps/ieee754/flt-32/s_logbf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf.c new file mode 100644 index 0000000000..1cacc8a950 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbf.c @@ -0,0 +1,32 @@ +/* Multiple versions of logbf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__logbf) __logbf_ppc64 attribute_hidden; +extern __typeof (__logbf) __logbf_power7 attribute_hidden; + +libc_ifunc (__logbf, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __logbf_power7 + : __logbf_ppc64); + +weak_alias (__logbf, logbf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-power7.c new file mode 100644 index 0000000000..60ec533b8e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-power7.c @@ -0,0 +1,19 @@ +/* logb(). PowerPC64/POWER7 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_logbl-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-ppc64.c new file mode 100644 index 0000000000..502410f877 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl-ppc64.c @@ -0,0 +1,21 @@ +/* logbl(). PowerPC64/POWER7 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define __logbl __logbl_ppc64 + +#include <sysdeps/ieee754/ldbl-128ibm/s_logbl.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl.c new file mode 100644 index 0000000000..63b9c812e2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_logbl.c @@ -0,0 +1,32 @@ +/* Multiple versions of logbl. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__logbl) __logbl_ppc64 attribute_hidden; +extern __typeof (__logbl) __logbl_power7 attribute_hidden; + +libc_ifunc (__logbl, + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __logbl_power7 + : __logbl_ppc64); + +long_double_symbol (libm, __logbl, logbl); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_lrint.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_lrint.c new file mode 100644 index 0000000000..d09286267b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_lrint.c @@ -0,0 +1 @@ + /* __lrint is in s_llrint.c */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_lround.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_lround.c new file mode 100644 index 0000000000..0dab5443e2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_lround.c @@ -0,0 +1 @@ +/* __lround is in s_llround.c */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf-power5+.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf-power5+.c new file mode 100644 index 0000000000..c923f84d97 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf-power5+.c @@ -0,0 +1,19 @@ +/* PowerPC/POWER5+ implementation for modf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modf-power5+.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf-ppc64.c new file mode 100644 index 0000000000..43318ee4dd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf-ppc64.c @@ -0,0 +1,29 @@ +/* PowerPC64 default implementation for modf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) + +#define __modf __modf_ppc64 + +#include <sysdeps/ieee754/dbl-64/s_modf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf.c new file mode 100644 index 0000000000..3e79b2bd5a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modf.c @@ -0,0 +1,44 @@ +/* Multiple versions of modf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__modf) __modf_ppc64 attribute_hidden; +extern __typeof (__modf) __modf_power5plus attribute_hidden; + +libc_ifunc (__modf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __modf_power5plus + : __modf_ppc64); + +weak_alias (__modf, modf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__modf, __modfl) +weak_alias (__modf, modfl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __modf, modfl, GLIBC_2_0); +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __modf, modfl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff-power5+.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff-power5+.c new file mode 100644 index 0000000000..22dbf5341e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff-power5+.c @@ -0,0 +1,19 @@ +/* PowerPC/POWER5+ implementation for modff. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_modff-power5+.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff-ppc64.c new file mode 100644 index 0000000000..6fc97f0114 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff-ppc64.c @@ -0,0 +1,26 @@ +/* PowerPC64 default implementation for modff. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __modff __modff_ppc64 + +#include <sysdeps/ieee754/flt-32/s_modff.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff.c new file mode 100644 index 0000000000..f57939cc66 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_modff.c @@ -0,0 +1,30 @@ +/* Multiple versions of modff. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include "init-arch.h" + +extern __typeof (__modff) __modff_ppc64 attribute_hidden; +extern __typeof (__modff) __modff_power5plus attribute_hidden; + +libc_ifunc (__modff, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __modff_power5plus + : __modff_ppc64); + +weak_alias (__modff, modff) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round-power5+.S new file mode 100644 index 0000000000..a7c7492f21 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round-power5+.S @@ -0,0 +1,31 @@ +/* round function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __round __round_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round-ppc64.S new file mode 100644 index 0000000000..44a2b0105a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round-ppc64.S @@ -0,0 +1,31 @@ +/* round function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __round __round_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_round.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round.c new file mode 100644 index 0000000000..d440f6f45c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_round.c @@ -0,0 +1,40 @@ +/* Multiple versions of round. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__round) __round_ppc64 attribute_hidden; +extern __typeof (__round) __round_power5plus attribute_hidden; + +libc_ifunc (__round, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __round_power5plus + : __round_ppc64); + +weak_alias (__round, round) + +#ifdef NO_LONG_DOUBLE +strong_alias (__round, __roundl) +weak_alias (__round, roundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __round, roundl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf-power5+.S new file mode 100644 index 0000000000..81501a1547 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf-power5+.S @@ -0,0 +1,26 @@ +/* roundf function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __roundf __roundf_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf-ppc64.S new file mode 100644 index 0000000000..8f3b24c556 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf-ppc64.S @@ -0,0 +1,26 @@ +/* roundf function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __roundf __roundf_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_roundf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf.c new file mode 100644 index 0000000000..09609d3e91 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_roundf.c @@ -0,0 +1,32 @@ +/* Multiple versions of roundf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__roundf) __roundf_ppc64 attribute_hidden; +extern __typeof (__roundf) __roundf_power5plus attribute_hidden; + +libc_ifunc (__roundf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __roundf_power5plus + : __roundf_ppc64); + +weak_alias (__roundf, roundf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-power8.S new file mode 100644 index 0000000000..3d01533da8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-power8.S @@ -0,0 +1,26 @@ +/* sinf(). PowerPC64/POWER8 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __sinf __sinf_power8 + +#include <sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c new file mode 100644 index 0000000000..83e37f92c6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf-ppc64.c @@ -0,0 +1,26 @@ +/* sinf(). PowerPC64 default version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a, b) + +#define __sinf __sinf_ppc64 + +#include <sysdeps/ieee754/flt-32/s_sinf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c new file mode 100644 index 0000000000..6d7d6ce50d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_sinf.c @@ -0,0 +1,31 @@ +/* Multiple versions of sinf. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__sinf) __sinf_ppc64 attribute_hidden; +extern __typeof (__sinf) __sinf_power8 attribute_hidden; + +libc_ifunc (__sinf, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __sinf_power8 + : __sinf_ppc64); + +weak_alias (__sinf, sinf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc-power5+.S new file mode 100644 index 0000000000..53d8cd5013 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc-power5+.S @@ -0,0 +1,31 @@ +/* trunc function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __trunc __trunc_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc-ppc64.S new file mode 100644 index 0000000000..36e8fd05c2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc-ppc64.S @@ -0,0 +1,31 @@ +/* trunc function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#undef weak_alias +#define weak_alias(a,b) +#undef strong_alias +#define strong_alias(a,b) +#undef compat_symbol +#define compat_symbol(a,b,c,d) + +#define __trunc __trunc_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_trunc.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc.c new file mode 100644 index 0000000000..54844d5ff2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_trunc.c @@ -0,0 +1,40 @@ +/* Multiple versions of trunc. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__trunc) __trunc_ppc64 attribute_hidden; +extern __typeof (__trunc) __trunc_power5plus attribute_hidden; + +libc_ifunc (__trunc, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __trunc_power5plus + : __trunc_ppc64); + +weak_alias (__trunc, trunc) + +#ifdef NO_LONG_DOUBLE +strong_alias (__trunc, __truncl) +weak_alias (__trunc, truncl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __trunc, truncl, GLIBC_2_0); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf-power5+.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf-power5+.S new file mode 100644 index 0000000000..e28de7cb1e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf-power5+.S @@ -0,0 +1,26 @@ +/* truncf function. PowerPC64/power5+ version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __truncf __truncf_power5plus + +#include <sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf-ppc64.S new file mode 100644 index 0000000000..b60242d83b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf-ppc64.S @@ -0,0 +1,26 @@ +/* truncf function. PowerPC64 default version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef weak_alias +#define weak_alias(a,b) + +#define __truncf __truncf_ppc64 + +#include <sysdeps/powerpc/powerpc64/fpu/s_truncf.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf.c new file mode 100644 index 0000000000..2c46525235 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/multiarch/s_truncf.c @@ -0,0 +1,32 @@ +/* Multiple versions of truncf. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <math_ldbl_opt.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__truncf) __truncf_ppc64 attribute_hidden; +extern __typeof (__truncf) __truncf_power5plus attribute_hidden; + +libc_ifunc (__truncf, + (hwcap & PPC_FEATURE_POWER5_PLUS) + ? __truncf_power5plus + : __truncf_ppc64); + +weak_alias (__truncf, truncf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_ceil.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_ceil.S new file mode 100644 index 0000000000..78d7feefed --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_ceil.S @@ -0,0 +1,72 @@ +/* ceil function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +EALIGN (__ceil, 4, 0) + CALL_MCOUNT 0 + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,2 /* Set rounding mode toward +inf. */ + ble- cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__ceil) + +weak_alias (__ceil, ceil) + +#ifdef NO_LONG_DOUBLE +weak_alias (__ceil, ceill) +strong_alias (__ceil, __ceill) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __ceil, ceill, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S new file mode 100644 index 0000000000..bc5ab02cb0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S @@ -0,0 +1,66 @@ +/* float ceil function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" + .p2align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 + .long 0x0 + .section ".text" + +EALIGN (__ceilf, 4, 0) + CALL_MCOUNT 0 + lfs fp13,.LC0@toc(2) + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,2 /* Set rounding mode toward +inf. */ + ble- cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__ceilf) + +weak_alias (__ceilf, ceilf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysign.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysign.S new file mode 100644 index 0000000000..59472816c7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysign.S @@ -0,0 +1,59 @@ +/* Copy a sign bit between floating-point values. PowerPC64 version. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__copysign) + CALL_MCOUNT 0 +/* double [f1] copysign (double [f1] x, double [f2] y); + copysign(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + stfd fp2,-8(r1) + nop + nop + nop + ld r3,-8(r1) + cmpdi r3,0 + blt L(0) + fabs fp1,fp1 + blr +L(0): fnabs fp1,fp1 + blr + END (__copysign) + +weak_alias (__copysign,copysign) + +/* It turns out that it's safe to use this code even for single-precision. */ +weak_alias (__copysign,copysignf) +strong_alias(__copysign,__copysignf) + +#ifdef NO_LONG_DOUBLE +weak_alias (__copysign,copysignl) +strong_alias(__copysign,__copysignl) +#endif +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __copysign, copysignl, GLIBC_2_0) +# endif +#elif LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __copysign, copysignl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysignf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysignf.S new file mode 100644 index 0000000000..e05438ae7d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysignf.S @@ -0,0 +1 @@ +/* __copysignf is in s_copysign.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysignl.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysignl.S new file mode 100644 index 0000000000..b33ea6e256 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_copysignl.S @@ -0,0 +1,48 @@ +/* Copy a sign bit between floating-point values. + IBM extended format long double version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__copysignl) +/* long double [f1,f2] copysign (long double [f1,f2] x, long double [f3,f4] y); + copysign(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + stfd fp3,-16(r1) + ld r3,-16(r1) + cmpdi r3,0 + blt L(0) + fmr fp0,fp1 + fabs fp1,fp1 + fneg fp3,fp2 + fsel fp2,fp0,fp2,fp3 + blr +L(0): + fmr fp0,fp1 + fnabs fp1,fp1 + fneg fp3,fp2 + fsel fp2,fp0,fp3,fp2 + blr +END (__copysignl) + +#if IS_IN (libm) +long_double_symbol (libm, __copysignl, copysignl) +#else +long_double_symbol (libc, __copysignl, copysignl) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fabs.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fabs.S new file mode 100644 index 0000000000..53d21301ee --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fabs.S @@ -0,0 +1,5 @@ +#include <math_ldbl_opt.h> +#include <sysdeps/powerpc/fpu/s_fabs.S> +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __fabs, fabsl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fabsl.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fabsl.S new file mode 100644 index 0000000000..7603abba5d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fabsl.S @@ -0,0 +1,34 @@ +/* Copy a sign bit between floating-point values. + IBM extended format long double version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__fabsl) +/* long double [f1,f2] fabs (long double [f1,f2] x); + fabs(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + fmr fp0,fp1 + fabs fp1,fp1 + fneg fp3,fp2 + fsel fp2,fp0,fp2,fp3 + blr +END (__fabsl) + +long_double_symbol (libm, __fabsl, fabsl) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_floor.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_floor.S new file mode 100644 index 0000000000..4a6cc0ebba --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_floor.S @@ -0,0 +1,72 @@ +/* Floor function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +EALIGN (__floor, 4, 0) + CALL_MCOUNT 0 + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,3 /* Set rounding mode toward -inf. */ + ble- cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__floor) + +weak_alias (__floor, floor) + +#ifdef NO_LONG_DOUBLE +weak_alias (__floor, floorl) +strong_alias (__floor, __floorl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __floor, floorl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_floorf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_floorf.S new file mode 100644 index 0000000000..d8b5e21248 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_floorf.S @@ -0,0 +1,66 @@ +/* float Floor function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" + .p2align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 + .long 0x0 + .section ".text" + +EALIGN (__floorf, 4, 0) + CALL_MCOUNT 0 + lfs fp13,.LC0@toc(2) + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,3 /* Set rounding mode toward -inf. */ + ble- cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__floorf) + +weak_alias (__floorf, floorf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fma.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fma.S new file mode 100644 index 0000000000..d40695c633 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_fma.S @@ -0,0 +1,5 @@ +#include <math_ldbl_opt.h> +#include <sysdeps/powerpc/fpu/s_fma.S> +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __fma, fmal, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_isnan.S new file mode 100644 index 0000000000..6cba2d4408 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_isnan.S @@ -0,0 +1,56 @@ +/* isnan(). PowerPC64 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power4 +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + mffs fp0 + mtfsb0 4*cr6+lt /* reset_fpscr_bit (FPSCR_VE) */ + fcmpu cr7,fp1,fp1 + mtfsf 255,fp0 + li r3,0 + beqlr+ cr7 /* (x == x) then not a NAN */ + li r3,1 /* else must be a NAN */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llrint.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llrint.S new file mode 100644 index 0000000000..39e765434a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llrint.S @@ -0,0 +1,47 @@ +/* Round double to long int. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long long int[r3] __llrint (double x[fp1]) */ +ENTRY (__llrint) + CALL_MCOUNT 0 + fctid fp13,fp1 + stfd fp13,-16(r1) + nop /* Insure the following load is in a different dispatch group */ + nop /* to avoid pipe stall on POWER4&5. */ + nop + ld r3,-16(r1) + blr + END (__llrint) + +strong_alias (__llrint, __lrint) +weak_alias (__llrint, llrint) +weak_alias (__lrint, lrint) + +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +strong_alias (__lrint, __lrintl) +weak_alias (__lrint, lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llrintf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llrintf.S new file mode 100644 index 0000000000..4050be6437 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llrintf.S @@ -0,0 +1,36 @@ +/* Round double to long int. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* long long int[r3] __llrintf (float x[fp1]) */ +ENTRY (__llrintf) + CALL_MCOUNT 0 + fctid fp13,fp1 + stfd fp13,-16(r1) + nop /* Insure the following load is in a different dispatch group */ + nop /* to avoid pipe stall on POWER4&5. */ + nop + ld r3,-16(r1) + blr + END (__llrintf) + +strong_alias (__llrintf, __lrintf) +weak_alias (__llrintf, llrintf) +weak_alias (__lrintf, lrintf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llround.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llround.S new file mode 100644 index 0000000000..0803ba1eb3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llround.S @@ -0,0 +1,96 @@ +/* llround function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2^52 */ + .tc FD_43300000_0[TC],0x4330000000000000 +.LC1: /* 0.5 */ + .tc FD_3fe00000_0[TC],0x3fe0000000000000 + .section ".text" + +/* long long [r3] llround (double x [fp1]) + IEEE 1003.1 llround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "round to Nearest" mode. Instead we set + "round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. + + It is necessary to detect when x is (+-)0x1.fffffffffffffp-2 + because adding +-0.5 in this case will cause an erroneous shift, + carry and round. We simply return 0 if 0.5 > x > -0.5. Likewise + if x is and odd number between +-(2^52 and 2^53-1) a shift and + carry will erroneously round if biased with +-0.5. Therefore if x + is greater/less than +-2^52 we don't need to bias the number with + +-0.5. */ + +ENTRY (__llround) + CALL_MCOUNT 0 + lfd fp9,.LC0@toc(2) /* Load 2^52 into fpr9. */ + lfd fp10,.LC1@toc(2)/* Load 0.5 into fpr10. */ + fabs fp2,fp1 /* Get the absolute value of x. */ + fsub fp12,fp10,fp10 /* Compute 0.0 into fp12. */ + fcmpu cr6,fp2,fp10 /* if |x| < 0.5 */ + fcmpu cr7,fp2,fp9 /* if |x| >= 2^52 */ + fcmpu cr1,fp1,fp12 /* x is negative? x < 0.0 */ + blt- cr6,.Lretzero /* 0.5 > x < -0.5 so just return 0. */ + bge- cr7,.Lnobias /* 2^52 > x < -2^52 just convert with no bias. */ + /* Test whether an integer to avoid spurious "inexact". */ + fadd fp3,fp2,fp9 + fsub fp3,fp3,fp9 + fcmpu cr5,fp2,fp3 + beq cr5,.Lnobias + fadd fp3,fp2,fp10 /* |x|+=0.5 bias to prepare to round. */ + bge cr1,.Lconvert /* x is positive so don't negate x. */ + fnabs fp3,fp3 /* -(|x|+=0.5) */ +.Lconvert: + fctidz fp4,fp3 /* Convert to Integer double word round toward 0. */ + stfd fp4,-16(r1) + nop + nop + nop + ld r3,-16(r1) /* Load return as integer. */ +.Lout: + blr +.Lretzero: /* 0.5 > x > -0.5 */ + li r3,0 /* return 0. */ + b .Lout +.Lnobias: + fmr fp3,fp1 + b .Lconvert + END (__llround) + +strong_alias (__llround, __lround) +weak_alias (__llround, llround) +weak_alias (__lround, lround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S new file mode 100644 index 0000000000..3e910ac322 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_llroundf.S @@ -0,0 +1,88 @@ +/* llroundf function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" +.LC0: /* 2^23 */ + .tc FD_41600000_0[TC],0x4160000000000000 +.LC1: /* 0.5 */ + .tc FD_3fe00000_0[TC],0x3fe0000000000000 +.LC2: /* 2^52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +/* long long [r3] llroundf (float x [fp1]) + IEEE 1003.1 llroundf function. IEEE specifies "roundf to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "roundf to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "round to Nearest" mode. Instead we set + "round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. + + It is necessary to detect when x is (+-)0x1.fffffffffffffp-2 + because adding +-0.5 in this case will cause an erroneous shift, + carry and round. We simply return 0 if 0.5 > x > -0.5. Likewise + if x is and odd number between +-(2^23 and 2^24-1) a shift and + carry will erroneously round if biased with +-0.5. Therefore if x + is greater/less than +-2^23 we don't need to bias the number with + +-0.5. */ + +ENTRY (__llroundf) + CALL_MCOUNT 0 + lfd fp9,.LC0@toc(2) /* Load 2^23 into fpr9. */ + lfd fp10,.LC1@toc(2)/* Load 0.5 into fpr10. */ + lfd fp11,.LC2@toc(2) /* Load 2^52 into fpr11. */ + fabs fp2,fp1 /* Get the absolute value of x. */ + fsub fp12,fp10,fp10 /* Compute 0.0 into fp12. */ + fcmpu cr6,fp2,fp10 /* if |x| < 0.5 */ + fcmpu cr7,fp2,fp9 /* if |x| >= 2^23 */ + fcmpu cr1,fp1,fp12 /* x is negative? x < 0.0 */ + blt- cr6,.Lretzero /* 0.5 > x < -0.5 so just return 0. */ + bge- cr7,.Lnobias /* 2^23 > x < -2^23 just convert with no bias. */ + /* Test whether an integer to avoid spurious "inexact". */ + fadd fp3,fp2,fp11 + fsub fp3,fp3,fp11 + fcmpu cr5,fp2,fp3 + beq cr5,.Lnobias + fadd fp3,fp2,fp10 /* |x|+=0.5 bias to prepare to round. */ + bge cr1,.Lconvert /* x is positive so don't negate x. */ + fnabs fp3,fp3 /* -(|x|+=0.5) */ +.Lconvert: + fctidz fp4,fp3 /* Convert to Integer double word round toward 0. */ + stfd fp4,-16(r1) + nop + nop + nop + ld r3,-16(r1) /* Load return as integer. */ +.Lout: + blr +.Lretzero: /* 0.5 > x > -0.5 */ + li r3,0 /* return 0. */ + b .Lout +.Lnobias: + fmr fp3,fp1 + b .Lconvert + END (__llroundf) + +strong_alias (__llroundf, __lroundf) +weak_alias (__llroundf, llroundf) +weak_alias (__lroundf, lroundf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lrint.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lrint.S new file mode 100644 index 0000000000..d3c2fff581 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lrint.S @@ -0,0 +1 @@ +/* __lrint is in s_llrint.c */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lround.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lround.S new file mode 100644 index 0000000000..4306c405c4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lround.S @@ -0,0 +1 @@ +/* __lround is in s_llround.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lroundf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lroundf.S new file mode 100644 index 0000000000..6b2a4e37a6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_lroundf.S @@ -0,0 +1 @@ +/* __lroundf is in s_llroundf.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_nearbyint.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_nearbyint.S new file mode 100644 index 0000000000..3dcd04b1f3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_nearbyint.S @@ -0,0 +1,75 @@ +/* Round to int floating-point values. PowerPC64 version. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + +/* double [fp1] nearbyint(double [fp1] x) */ + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +EALIGN (__nearbyint, 4, 0) + CALL_MCOUNT 0 + fabs fp0,fp1 + lfd fp13,.LC0@toc(2) + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + bge cr7,.L10 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp1,fp12 /* if (x > 0.0) */ + ble cr7, L(lessthanzero) + mffs fp11 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fadd fp1,fp1,fp13 /* x+= TWO52 */ + fsub fp1,fp1,fp13 /* x-= TWO52 */ + fabs fp1,fp1 /* if (x == 0.0) */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr /* x = 0.0; */ +L(lessthanzero): + bgelr cr7 /* if (x < 0.0) */ + mffs fp11 + mtfsb0 4*cr7+lt + fsub fp1,fp1,fp13 /* x -= TWO52 */ + fadd fp1,fp1,fp13 /* x += TWO52 */ + fnabs fp1,fp1 /* if (x == 0.0) */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr /* x = -0.0; */ +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr +END (__nearbyint) + +weak_alias (__nearbyint, nearbyint) + +#ifdef NO_LONG_DOUBLE +weak_alias (__nearbyint, nearbyint) +strong_alias (__nearbyint, __nearbyintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __nearbyint, nearbyintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S new file mode 100644 index 0000000000..11be35f94e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S @@ -0,0 +1,68 @@ +/* Round to int floating-point values. PowerPC64 version. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011 + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> + + +/* float [fp1] nearbyintf(float [fp1]) */ + + .section ".toc","aw" + .p2align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 + .long 0x0 + .section ".text" + +EALIGN (__nearbyintf, 4, 0) + CALL_MCOUNT 0 + fabs fp0,fp1 + lfs fp13,.LC0@toc(2) + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + bge cr7,.L10 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp1,fp12 /* if (x > 0.0) */ + ble cr7, L(lessthanzero) + mffs fp11 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fadds fp1,fp1,fp13 /* x += TWO23 */ + fsubs fp1,fp1,fp13 /* x -= TWO23 */ + fabs fp1,fp1 /* if (x == 0.0) */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr /* x = 0.0; */ +L(lessthanzero): + bgelr cr7 /* if (x < 0.0) */ + mffs fp11 + mtfsb0 4*cr7+lt /* Disable FE_INEXACT exception */ + fsubs fp1,fp1,fp13 /* x -= TWO23 */ + fadds fp1,fp1,fp13 /* x += TWO23 */ + fnabs fp1,fp1 /* if (x == 0.0) */ + mtfsf 0xff,fp11 /* Restore FE_INEXACT state. */ + blr /* x = -0.0; */ +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr +END (__nearbyintf) + +weak_alias (__nearbyintf, nearbyintf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_rint.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_rint.S new file mode 100644 index 0000000000..7ba0adff84 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_rint.S @@ -0,0 +1,65 @@ +/* Round to int floating-point values. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This has been coded in assembler because GCC makes such a mess of it + when it's coded in C. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +EALIGN (__rint, 4, 0) + CALL_MCOUNT 0 + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl cr7,.L10 + bng cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = 0.0; */ +.L4: + bnllr cr6 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = -0.0; */ +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__rint) + +weak_alias (__rint, rint) + +#ifdef NO_LONG_DOUBLE +weak_alias (__rint, rintl) +strong_alias (__rint, __rintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __rint, rintl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_rintf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_rintf.S new file mode 100644 index 0000000000..b1d1e158c0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_rintf.S @@ -0,0 +1,56 @@ +/* Round float to int floating-point values. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" + .p2align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 + .long 0x0 + .section ".text" + +EALIGN (__rintf, 4, 0) + CALL_MCOUNT 0 + lfs fp13,.LC0@toc(2) + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl cr7,.L10 + bng cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = 0.0; */ +.L4: + bnllr cr6 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + blr /* x = -0.0; */ +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__rintf) + +weak_alias (__rintf, rintf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_round.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_round.S new file mode 100644 index 0000000000..fe315af51d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_round.S @@ -0,0 +1,87 @@ +/* round function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 +.LC1: /* 0.5 */ + .tc FD_3fe00000_0[TC],0x3fe0000000000000 + .section ".text" + +/* double [fp1] round (double x [fp1]) + IEEE 1003.1 round function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "Round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "Round to Nearest" mode. Instead we set + "Round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. */ + +EALIGN (__round, 4, 0) + CALL_MCOUNT 0 + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding mode toward 0. */ + lfd fp10,.LC1@toc(2) + ble- cr6,.L4 + fadd fp1,fp1,fp10 /* x+= 0.5; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + fsub fp9,fp1,fp10 /* x+= 0.5; */ + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp9,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__round) + +weak_alias (__round, round) + +#ifdef NO_LONG_DOUBLE +weak_alias (__round, roundl) +strong_alias (__round, __roundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __round, roundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_roundf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_roundf.S new file mode 100644 index 0000000000..d213f43566 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_roundf.S @@ -0,0 +1,81 @@ +/* roundf function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" + .p2align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 +.LC1: /* 0.5 */ + .long 0x3f000000 + + .section ".text" + +/* float [fp1] roundf (float x [fp1]) + IEEE 1003.1 round function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "Round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we can't use the PowerPC "Round to Nearest" mode. Instead we set + "Round toward Zero" mode and round by adding +-0.5 before rounding + to the integer value. */ + +EALIGN (__roundf, 4, 0) + CALL_MCOUNT 0 + lfs fp13,.LC0@toc(2) + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding mode toward 0. */ + lfs fp10,.LC1@toc(2) + ble- cr6,.L4 + fadds fp1,fp1,fp10 /* x+= 0.5; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + fsubs fp9,fp1,fp10 /* x+= 0.5; */ + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp9,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__roundf) + +weak_alias (__roundf, roundf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_trunc.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_trunc.S new file mode 100644 index 0000000000..890eb21c54 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_trunc.S @@ -0,0 +1,79 @@ +/* trunc function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .section ".toc","aw" +.LC0: /* 2**52 */ + .tc FD_43300000_0[TC],0x4330000000000000 + .section ".text" + +/* double [fp1] trunc (double x [fp1]) + IEEE 1003.1 trunc function. IEEE specifies "trunc to the integer + value, in floating format, nearest to but no larger in magnitude + then the argument." + We set "round toward Zero" mode and trunc by adding +-2**52 then + subtracting +-2**52. */ + +EALIGN (__trunc, 4, 0) + CALL_MCOUNT 0 + lfd fp13,.LC0@toc(2) + fabs fp0,fp1 + fsub fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding toward 0 mode. */ + ble- cr6,.L4 + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsub fp1,fp1,fp13 /* x-= TWO52; */ + fadd fp1,fp1,fp13 /* x+= TWO52; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadd fp1,fp1,fp1 + blr + END (__trunc) + +weak_alias (__trunc, trunc) + +#ifdef NO_LONG_DOUBLE +weak_alias (__trunc, truncl) +strong_alias (__trunc, __truncl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __trunc, truncl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_truncf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_truncf.S new file mode 100644 index 0000000000..cfcff80bf7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/fpu/s_truncf.S @@ -0,0 +1,73 @@ +/* truncf function. PowerPC64 version. + Copyright (C) 2004-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" + .p2align 3 +.LC0: /* 2**23 */ + .long 0x4b000000 + .long 0x0 + .section ".text" + +/* float [fp1] truncf (float x [fp1]) + IEEE 1003.1 trunc function. IEEE specifies "trunc to the integer + value, in floating format, nearest to but no larger in magnitude + then the argument." + We set "round toward Zero" mode and trunc by adding +-2**23 then + subtracting +-2**23. */ + +EALIGN (__truncf, 4, 0) + CALL_MCOUNT 0 + lfs fp13,.LC0@toc(2) + fabs fp0,fp1 + fsubs fp12,fp13,fp13 /* generate 0.0 */ + fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO23) */ + mffs fp11 /* Save current FPU rounding mode and + "inexact" state. */ + fcmpu cr6,fp1,fp12 /* if (x > 0.0) */ + bnl- cr7,.L10 + mtfsfi 7,1 /* Set rounding toward 0 mode. */ + ble- cr6,.L4 + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fabs fp1,fp1 /* if (x == 0.0) */ + /* x = 0.0; */ + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L4: + bge- cr6,.L9 /* if (x < 0.0) */ + fsubs fp1,fp1,fp13 /* x-= TWO23; */ + fadds fp1,fp1,fp13 /* x+= TWO23; */ + fnabs fp1,fp1 /* if (x == 0.0) */ + /* x = -0.0; */ +.L9: + mtfsf 0xff,fp11 /* Restore previous rounding mode and + "inexact" state. */ + blr +.L10: + /* Ensure sNaN input is converted to qNaN. */ + fcmpu cr7,fp1,fp1 + beqlr cr7 + fadds fp1,fp1,fp1 + blr + END (__truncf) + +weak_alias (__truncf, truncf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/hp-timing.h b/REORG.TODO/sysdeps/powerpc/powerpc64/hp-timing.h new file mode 100644 index 0000000000..33064c6781 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/hp-timing.h @@ -0,0 +1,46 @@ +/* High precision, low overhead timing functions. powerpc64 version. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _HP_TIMING_H +#define _HP_TIMING_H 1 + +/* We always assume having the timestamp register. */ +#define HP_TIMING_AVAIL (1) +#define HP_SMALL_TIMING_AVAIL (1) + +/* We indeed have inlined functions. */ +#define HP_TIMING_INLINE (1) + +/* We use 64bit values for the times. */ +typedef unsigned long long int hp_timing_t; + +/* That's quite simple. Use the `mftb' instruction. Note that the value + might not be 100% accurate since there might be some more instructions + running in this moment. This could be changed by using a barrier like + 'lwsync' right before the `mftb' instruction. But we are not interested + in accurate clock cycles here so we don't do this. */ +#ifdef _ARCH_PWR4 +#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("mfspr %0,268" : "=r" (Var)) +#else +#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("mftb %0" : "=r" (Var)) +#endif + +#include <hp-timing-common.h> + +#endif /* hp-timing.h */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/lshift.S b/REORG.TODO/sysdeps/powerpc/powerpc64/lshift.S new file mode 100644 index 0000000000..480e38688b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/lshift.S @@ -0,0 +1,177 @@ +/* PowerPC64 mpn_lshift -- rp[] = up[] << cnt + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define RP r3 +#define UP r4 +#define N r5 +#define CNT r6 + +#define TNC r0 +#define U0 r30 +#define U1 r31 +#define RETVAL r5 + +EALIGN(__mpn_lshift, 5, 0) + std U1, -8(r1) + std U0, -16(r1) + subfic TNC, CNT, 64 + sldi r7, N, RP + add UP, UP, r7 + add RP, RP, r7 + rldicl. U0, N, 0, 62 + cmpdi CNT, U0, 2 + addi U1, N, RP + ld r10, -8(UP) + srd RETVAL, r10, TNC + + srdi U1, U1, 2 + mtctr U1 + beq cr0, L(b00) + blt cr6, L(b01) + ld r11, -16(UP) + beq cr6, L(b10) + + .align 4 +L(b11): sld r8, r10, CNT + srd r9, r11, TNC + ld U1, -24(UP) + addi UP, UP, -24 + sld r12, r11, CNT + srd r7, U1, TNC + addi RP, RP, 16 + bdnz L(gt3) + + or r11, r8, r9 + sld r8, U1, CNT + b L(cj3) + + .align 4 +L(gt3): ld U0, -8(UP) + or r11, r8, r9 + sld r8, U1, CNT + srd r9, U0, TNC + ld U1, -16(UP) + or r10, r12, r7 + b L(L11) + + .align 5 +L(b10): sld r12, r10, CNT + addi RP, RP, 24 + srd r7, r11, TNC + bdnz L(gt2) + + sld r8, r11, CNT + or r10, r12, r7 + b L(cj2) + +L(gt2): ld U0, -24(UP) + sld r8, r11, CNT + srd r9, U0, TNC + ld U1, -32(UP) + or r10, r12, r7 + sld r12, U0, CNT + srd r7, U1, 0 + ld U0, -40(UP) + or r11, r8, r9 + addi UP, UP, -16 + b L(L10) + + .align 4 +L(b00): ld U1, -16(UP) + sld r12, r10, CNT + srd r7, U1, TNC + ld U0, -24(UP) + sld r8, U1, CNT + srd r9, U0, TNC + ld U1, -32(UP) + or r10, r12, r7 + sld r12, U0, CNT + srd r7, U1, TNC + addi RP, RP, r8 + bdz L(cj4) + +L(gt4): addi UP, UP, -32 + ld U0, -8(UP) + or r11, r8, r9 + b L(L00) + + .align 4 +L(b01): bdnz L(gt1) + sld r8, r10, CNT + std r8, -8(RP) + b L(ret) + +L(gt1): ld U0, -16(UP) + sld r8, r10, CNT + srd r9, U0, TNC + ld U1, -24(UP) + sld r12, U0, CNT + srd r7, U1, TNC + ld U0, -32(UP) + or r11, r8, r9 + sld r8, U1, CNT + srd r9, U0, TNC + ld U1, -40(UP) + addi UP, UP, -40 + or r10, r12, r7 + bdz L(end) + + .align 5 +L(top): sld r12, U0, CNT + srd r7, U1, TNC + ld U0, -8(UP) + std r11, -8(RP) + or r11, r8, r9 +L(L00): sld r8, U1, CNT + srd r9, U0, TNC + ld U1, -16(UP) + std r10, -16(RP) + or r10, r12, r7 +L(L11): sld r12, U0, CNT + srd r7, U1, TNC + ld U0, -24(UP) + std r11, -24(RP) + or r11, r8, r9 +L(L10): sld r8, U1, CNT + srd r9, U0, TNC + ld U1, -32(UP) + addi UP, UP, -32 + std r10, -32(RP) + addi RP, RP, -32 + or r10, r12, r7 + bdnz L(top) + + .align 5 +L(end): sld r12, U0, CNT + srd r7, U1, TNC + std r11, -8(RP) +L(cj4): or r11, r8, r9 + sld r8, U1, CNT + std r10, -16(RP) +L(cj3): or r10, r12, r7 + std r11, -24(RP) +L(cj2): std r10, -32(RP) + std r8, -40(RP) + +L(ret): ld U1, -8(r1) + ld U0, -16(r1) + mr RP, RETVAL + blr +END(__mpn_lshift) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/memcpy.S new file mode 100644 index 0000000000..a4c82c31ef --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/memcpy.S @@ -0,0 +1,397 @@ +/* Optimized memcpy implementation for PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. + + Memcpy handles short copies (< 32-bytes) using a binary move blocks + (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled + with the appropriate combination of byte and halfword load/stores. + There is minimal effort to optimize the alignment of short moves. + The 64-bit implementations of POWER3 and POWER4 do a reasonable job + of handling unaligned load/stores that do not cross 32-byte boundaries. + + Longer moves (>= 32-bytes) justify the effort to get at least the + destination doubleword (8-byte) aligned. Further optimization is + possible when both source and destination are doubleword aligned. + Each case has a optimized unrolled loop. */ + +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + +EALIGN (MEMCPY, 5, 0) + CALL_MCOUNT 3 + + cmpldi cr1,5,31 + neg 0,3 + std 3,-16(1) + std 31,-8(1) + cfi_offset(31,-8) + andi. 11,3,7 /* check alignment of dst. */ + clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ + clrldi 10,4,61 /* check alignment of src. */ + cmpldi cr6,5,8 + ble- cr1,.L2 /* If move < 32 bytes use short move code. */ + cmpld cr6,10,11 + mr 12,4 + srdi 9,5,3 /* Number of full double words remaining. */ + mtcrf 0x01,0 + mr 31,5 + beq .L0 + + subf 31,0,5 + /* Move 0-7 bytes as needed to get the destination doubleword aligned. */ +1: bf 31,2f + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: bf 30,4f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: bf 29,0f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +0: + clrldi 10,12,61 /* check alignment of src again. */ + srdi 9,31,3 /* Number of full double words remaining. */ + + /* Copy doublewords from source to destination, assuming the + destination is aligned on a doubleword boundary. + + At this point we know there are at least 25 bytes left (32-7) to copy. + The next step is to determine if the source is also doubleword aligned. + If not branch to the unaligned move code at .L6. which uses + a load, shift, store strategy. + + Otherwise source and destination are doubleword aligned, and we can + the optimized doubleword copy loop. */ +.L0: + clrldi 11,31,61 + mtcrf 0x01,9 + bne- cr6,.L6 /* If source is not DW aligned. */ + + /* Move doublewords where destination and source are DW aligned. + Use a unrolled loop to copy 4 doubleword (32-bytes) per iteration. + If the copy is not an exact multiple of 32 bytes, 1-3 + doublewords are copied as needed to set up the main loop. After + the main loop exits there may be a tail of 1-7 bytes. These byte are + copied a word/halfword/byte at a time as needed to preserve alignment. */ + + srdi 8,31,5 + cmpldi cr1,9,4 + cmpldi cr6,11,0 + mr 11,12 + + bf 30,1f + ld 6,0(12) + ld 7,8(12) + addi 11,12,16 + mtctr 8 + std 6,0(3) + std 7,8(3) + addi 10,3,16 + bf 31,4f + ld 0,16(12) + std 0,16(3) + blt cr1,3f + addi 11,12,24 + addi 10,3,24 + b 4f + .align 4 +1: + mr 10,3 + mtctr 8 + bf 31,4f + ld 6,0(12) + addi 11,12,8 + std 6,0(3) + addi 10,3,8 + + .align 4 +4: + ld 6,0(11) + ld 7,8(11) + ld 8,16(11) + ld 0,24(11) + addi 11,11,32 +2: + std 6,0(10) + std 7,8(10) + std 8,16(10) + std 0,24(10) + addi 10,10,32 + bdnz 4b +3: + + rldicr 0,31,0,60 + mtcrf 0x01,31 + beq cr6,0f +.L9: + add 3,3,0 + add 12,12,0 + +/* At this point we have a tail of 0-7 bytes and we know that the + destination is double word aligned. */ +4: bf 29,2f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: bf 30,1f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + ld 31,-8(1) + ld 3,-16(1) + blr + +/* Copy up to 31 bytes. This divided into two cases 0-8 bytes and 9-31 + bytes. Each case is handled without loops, using binary (1,2,4,8) + tests. + + In the short (0-8 byte) case no attempt is made to force alignment + of either source or destination. The hardware will handle the + unaligned load/stores with small delays for crossing 32- 64-byte, and + 4096-byte boundaries. Since these short moves are unlikely to be + unaligned or cross these boundaries, the overhead to force + alignment is not justified. + + The longer (9-31 byte) move is more likely to cross 32- or 64-byte + boundaries. Since only loads are sensitive to the 32-/64-byte + boundaries it is more important to align the source then the + destination. If the source is not already word aligned, we first + move 1-3 bytes as needed. Since we are only word aligned we don't + use double word load/stores to insure that all loads are aligned. + While the destination and stores may still be unaligned, this + is only an issue for page (4096 byte boundary) crossing, which + should be rare for these short moves. The hardware handles this + case automatically with a small delay. */ + + .align 4 +.L2: + mtcrf 0x01,5 + neg 8,4 + clrrdi 11,4,2 + andi. 0,8,3 + ble cr6,.LE8 /* Handle moves of 0-8 bytes. */ +/* At least 9 bytes left. Get the source word aligned. */ + cmpldi cr1,5,16 + mr 10,5 + mr 12,4 + cmpldi cr6,0,2 + beq .L3 /* If the source is already word aligned skip this. */ +/* Copy 1-3 bytes to get source address word aligned. */ + lwz 6,0(11) + subf 10,0,5 + add 12,4,0 + blt cr6,5f + srdi 7,6,16 + bgt cr6,3f +#ifdef __LITTLE_ENDIAN__ + sth 7,0(3) +#else + sth 6,0(3) +#endif + b 7f + .align 4 +3: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,24 + stb 6,0(3) + sth 7,1(3) +#else + stb 7,0(3) + sth 6,1(3) +#endif + b 7f + .align 4 +5: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,8 +#endif + stb 6,0(3) +7: + cmpldi cr1,10,16 + add 3,3,0 + mtcrf 0x01,10 + .align 4 +.L3: +/* At least 6 bytes left and the source is word aligned. */ + blt cr1,8f +16: /* Move 16 bytes. */ + lwz 6,0(12) + lwz 7,4(12) + stw 6,0(3) + lwz 6,8(12) + stw 7,4(3) + lwz 7,12(12) + addi 12,12,16 + stw 6,8(3) + stw 7,12(3) + addi 3,3,16 +8: /* Move 8 bytes. */ + bf 28,4f + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Move 4 bytes. */ + bf 29,2f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Move 2-3 bytes. */ + bf 30,1f + lhz 6,0(12) + sth 6,0(3) + bf 31,0f + lbz 7,2(12) + stb 7,2(3) + ld 3,-16(1) + blr +1: /* Move 1 byte. */ + bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + ld 3,-16(1) + blr + +/* Special case to copy 0-8 bytes. */ + .align 4 +.LE8: + mr 12,4 + bne cr6,4f +/* Would have liked to use use ld/std here but the 630 processors are + slow for load/store doubles that are not at least word aligned. + Unaligned Load/Store word execute with only a 1 cycle penalty. */ + lwz 6,0(4) + lwz 7,4(4) + stw 6,0(3) + stw 7,4(3) + /* Return original dst pointer. */ + ld 3,-16(1) + blr + .align 4 +4: bf 29,2b + lwz 6,0(4) + stw 6,0(3) +6: + bf 30,5f + lhz 7,4(4) + sth 7,4(3) + bf 31,0f + lbz 8,6(4) + stb 8,6(3) + ld 3,-16(1) + blr + .align 4 +5: + bf 31,0f + lbz 6,4(4) + stb 6,4(3) + .align 4 +0: + /* Return original dst pointer. */ + ld 3,-16(1) + blr + + .align 4 +.L6: + + /* Copy doublewords where the destination is aligned but the source is + not. Use aligned doubleword loads from the source, shifted to realign + the data, to allow aligned destination stores. */ + subf 5,10,12 + andi. 0,9,1 + cmpldi cr6,11,0 + sldi 10,10,3 + mr 11,9 + mr 4,3 + ld 6,0(5) + ld 7,8(5) + subfic 9,10,64 + beq 2f +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 +#else + sld 0,6,10 +#endif + cmpldi 11,1 + mr 6,7 + addi 4,4,-8 + addi 11,11,-1 + b 1f +2: addi 5,5,8 + .align 4 +#ifdef __LITTLE_ENDIAN__ +0: srd 0,6,10 + sld 8,7,9 +#else +0: sld 0,6,10 + srd 8,7,9 +#endif + cmpldi 11,2 + ld 6,8(5) + or 0,0,8 + addi 11,11,-2 + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srd 0,7,10 +1: sld 8,6,9 +#else + sld 0,7,10 +1: srd 8,6,9 +#endif + or 0,0,8 + beq 8f + ld 7,16(5) + std 0,8(4) + addi 5,5,16 + addi 4,4,16 + b 0b + .align 4 +8: + std 0,8(4) + rldicr 0,31,0,60 + mtcrf 0x01,31 + bne cr6,.L9 /* If the tail is 0 bytes we are done! */ + /* Return original dst pointer. */ + ld 31,-8(1) + ld 3,-16(1) + blr +END_GEN_TB (MEMCPY,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc64/memset.S new file mode 100644 index 0000000000..f6581b50f7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/memset.S @@ -0,0 +1,265 @@ +/* Optimized memset implementation for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section ".toc","aw" +.LC0: + .tc __cache_line_size[TC],__cache_line_size + .section ".text" + .align 2 + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. + + The memset is done in three sizes: byte (8 bits), word (32 bits), + cache line (256 bits). There is a special case for setting cache lines + to 0, to take advantage of the dcbz instruction. */ + +#ifndef MEMSET +# define MEMSET memset +#endif + +EALIGN (MEMSET, 5, 0) + CALL_MCOUNT 3 + +#define rTMP r0 +#define rRTN r3 /* Initial value of 1st argument. */ +#define rMEMP0 r3 /* Original value of 1st arg. */ +#define rCHR r4 /* Char to set in each byte. */ +#define rLEN r5 /* Length of region to set. */ +#define rMEMP r6 /* Address at which we are storing. */ +#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */ +#define rMEMP2 r8 + +#define rNEG64 r8 /* Constant -64 for clearing with dcbz. */ +#define rCLS r8 /* Cache line size obtained from static. */ +#define rCLM r9 /* Cache line size mask to check for cache alignment. */ +L(_memset): +/* Take care of case for size <= 4. */ + cmpldi cr1, rLEN, 8 + andi. rALIGN, rMEMP0, 7 + mr rMEMP, rMEMP0 + ble- cr1, L(small) + +/* Align to doubleword boundary. */ + cmpldi cr5, rLEN, 31 + insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */ + beq+ L(aligned2) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 8 + cror 28,30,31 /* Detect odd word aligned. */ + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + bt 29, L(g4) +/* Process the even word of doubleword. */ + bf+ 31, L(g2) + stb rCHR, 0(rMEMP0) + bt 30, L(g4x) +L(g2): + sth rCHR, -6(rMEMP) +L(g4x): + stw rCHR, -4(rMEMP) + b L(aligned) +/* Process the odd word of doubleword. */ +L(g4): + bf 28, L(g4x) /* If false, word aligned on odd word. */ + bf+ 31, L(g0) + stb rCHR, 0(rMEMP0) + bt 30, L(aligned) +L(g0): + sth rCHR, -2(rMEMP) + +/* Handle the case of size < 31. */ +L(aligned2): + insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ +L(aligned): + mtcrf 0x01, rLEN + ble cr5, L(medium) +/* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x18 + subfic rALIGN, rALIGN, 0x20 + insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + cmplwi cr1, rALIGN, 0x10 + mr rMEMP2, rMEMP + bf 28, L(a1) + stdu rCHR, -8(rMEMP2) +L(a1): blt cr1, L(a2) + std rCHR, -8(rMEMP2) + stdu rCHR, -16(rMEMP2) +L(a2): + +/* Now aligned to a 32 byte boundary. */ +L(caligned): + cmpldi cr1, rCHR, 0 + clrrdi. rALIGN, rLEN, 5 + mtcrf 0x01, rLEN + beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */ +L(nondcbz): + srdi rTMP, rALIGN, 5 + mtctr rTMP + beq L(medium) /* We may not actually get to do a full line. */ + clrldi. rLEN, rLEN, 59 + add rMEMP, rMEMP, rALIGN + li rNEG64, -0x40 + bdz L(cloopdone) + +L(c3): dcbtst rNEG64, rMEMP + std rCHR, -8(rMEMP) + std rCHR, -16(rMEMP) + std rCHR, -24(rMEMP) + stdu rCHR, -32(rMEMP) + bdnz L(c3) +L(cloopdone): + std rCHR, -8(rMEMP) + std rCHR, -16(rMEMP) + cmpldi cr1, rLEN, 16 + std rCHR, -24(rMEMP) + stdu rCHR, -32(rMEMP) + beqlr + add rMEMP, rMEMP, rALIGN + b L(medium_tail2) + + .align 5 +/* Clear lines of memory in 128-byte chunks. */ +L(zloopstart): +/* If the remaining length is less the 32 bytes, don't bother getting + the cache line size. */ + beq L(medium) + ld rCLS,.LC0@toc(r2) + lwz rCLS,0(rCLS) +/* If the cache line size was not set just goto to L(nondcbz) which is + safe for any cache line size. */ + cmpldi cr1,rCLS,0 + beq cr1,L(nondcbz) + + +/* Now we know the cache line size, and it is not 32-bytes, but + we may not yet be aligned to the cache line. May have a partial + line to fill, so touch it 1st. */ + dcbt 0,rMEMP + addi rCLM,rCLS,-1 +L(getCacheAligned): + cmpldi cr1,rLEN,32 + and. rTMP,rCLM,rMEMP + blt cr1,L(handletail32) + beq L(cacheAligned) + addi rMEMP,rMEMP,32 + addi rLEN,rLEN,-32 + std rCHR,-32(rMEMP) + std rCHR,-24(rMEMP) + std rCHR,-16(rMEMP) + std rCHR,-8(rMEMP) + b L(getCacheAligned) + +/* Now we are aligned to the cache line and can use dcbz. */ +L(cacheAligned): + cmpld cr1,rLEN,rCLS + blt cr1,L(handletail32) + dcbz 0,rMEMP + subf rLEN,rCLS,rLEN + add rMEMP,rMEMP,rCLS + b L(cacheAligned) + +/* We are here because the cache line size was set and was not 32-bytes + and the remainder (rLEN) is less than the actual cache line size. + So set up the preconditions for L(nondcbz) and go there. */ +L(handletail32): + clrrwi. rALIGN, rLEN, 5 + b L(nondcbz) + + .align 5 +L(small): +/* Memset of 8 bytes or less. */ + cmpldi cr6, rLEN, 4 + cmpldi cr5, rLEN, 1 + ble cr6,L(le4) + subi rLEN, rLEN, 4 + stb rCHR,0(rMEMP) + stb rCHR,1(rMEMP) + stb rCHR,2(rMEMP) + stb rCHR,3(rMEMP) + addi rMEMP,rMEMP, 4 + cmpldi cr5, rLEN, 1 +L(le4): + cmpldi cr1, rLEN, 3 + bltlr cr5 + stb rCHR, 0(rMEMP) + beqlr cr5 + stb rCHR, 1(rMEMP) + bltlr cr1 + stb rCHR, 2(rMEMP) + beqlr cr1 + stb rCHR, 3(rMEMP) + blr + +/* Memset of 0-31 bytes. */ + .align 5 +L(medium): + insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + cmpldi cr1, rLEN, 16 +L(medium_tail2): + add rMEMP, rMEMP, rLEN +L(medium_tail): + bt- 31, L(medium_31t) + bt- 30, L(medium_30t) +L(medium_30f): + bt- 29, L(medium_29t) +L(medium_29f): + bge- cr1, L(medium_27t) + bflr- 28 + std rCHR, -8(rMEMP) + blr + +L(medium_31t): + stbu rCHR, -1(rMEMP) + bf- 30, L(medium_30f) +L(medium_30t): + sthu rCHR, -2(rMEMP) + bf- 29, L(medium_29f) +L(medium_29t): + stwu rCHR, -4(rMEMP) + blt- cr1, L(medium_27f) +L(medium_27t): + std rCHR, -8(rMEMP) + stdu rCHR, -16(rMEMP) +L(medium_27f): + bflr- 28 +L(medium_28t): + std rCHR, -8(rMEMP) + blr +END_GEN_TB (MEMSET,TB_TOCLESS) +libc_hidden_builtin_def (memset) + +#ifndef NO_BZERO_IMPL +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. */ +ENTRY (__bzero) + CALL_MCOUNT 3 + mr r5,r4 + li r4,0 + b L(_memset) +END_GEN_TB (__bzero,TB_TOCLESS) + +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/mul_1.S b/REORG.TODO/sysdeps/powerpc/powerpc64/mul_1.S new file mode 100644 index 0000000000..68e85cbdc8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/mul_1.S @@ -0,0 +1,135 @@ +/* PowerPC64 __mpn_mul_1 -- Multiply a limb vector with a limb and store + the result in a second limb vector. + Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define RP r3 +#define UP r4 +#define N r5 +#define VL r6 + +EALIGN(__mpn_mul_1, 5, 0) + std r27, -40(r1) + std r26, -48(r1) + li r12, 0 + ld r26, 0(UP) + + rldicl. r0, N, 0, 62 + cmpdi VL, r0, 2 + addic N, N, RP + srdi N, N, 2 + mtctr N + beq cr0, L(b00) + blt cr6, L(b01) + beq cr6, L(b10) + +L(b11): mr cr7, r12 + mulld cr0, r26, VL + mulhdu r12, r26, VL + addi UP, UP, 8 + addc r0, r0, r7 + std r0, 0(RP) + addi RP, RP, 8 + b L(fic) + +L(b00): ld r27, r8(UP) + addi UP, UP, 16 + mulld r0, r26, VL + mulhdu N, r26, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + addc r0, r0, r12 + adde r7, r7, N + addze r12, r8 + std r0, 0(RP) + std r7, 8(RP) + addi RP, RP, 16 + b L(fic) + + nop +L(b01): bdnz L(gt1) + mulld r0, r26, VL + mulhdu r8, r26, VL + addc r0, r0, r12 + std r0, 0(RP) + b L(ret) +L(gt1): ld r27, 8(UP) + nop + mulld r0, r26, VL + mulhdu N, r26, VL + ld r26, 16(UP) + mulld r7, r27, VL + mulhdu r8, r27, VL + mulld r9, r26, VL + mulhdu r10, r26, VL + addc r0, r0, r12 + adde r7, r7, N + adde r9, r9, r8 + addze r12, r10 + std r0, 0(RP) + std r7, 8(RP) + std r9, 16(RP) + addi UP, UP, 24 + addi RP, RP, 24 + b L(fic) + + nop +L(fic): ld r26, 0(UP) +L(b10): ld r27, 8(UP) + addi UP, UP, 16 + bdz L(end) + +L(top): mulld r0, r26, VL + mulhdu N, r26, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + ld r26, 0(UP) + ld r27, 8(UP) + adde r0, r0, r12 + adde r7, r7, N + mulld r9, r26, VL + mulhdu r10, r26, VL + mulld r11, r27, VL + mulhdu r12, r27, VL + ld r26, 16(UP) + ld r27, 24(UP) + std r0, 0(RP) + adde r9, r9, r8 + std r7, 8(RP) + adde r11, r11, r10 + std r9, 16(RP) + addi UP, UP, 32 + std r11, 24(RP) + + addi RP, RP, 32 + bdnz L(top) + +L(end): mulld r0, r26, VL + mulhdu N, r26, VL + mulld r7, r27, VL + mulhdu r8, r27, VL + adde r0, r0, r12 + adde r7, r7, N + std r0, 0(RP) + std r7, 8(RP) +L(ret): addze RP, r8 + ld r27, -40(r1) + ld r26, -48(r1) + blr +END(__mpn_mul_1) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/Makefile new file mode 100644 index 0000000000..5da9052993 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -0,0 +1,47 @@ +ifeq ($(subdir),string) +sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ + memcpy-power4 memcpy-ppc64 \ + memcmp-power8 memcmp-power7 memcmp-power4 memcmp-ppc64 \ + memset-power7 memset-power6 memset-power4 \ + memset-ppc64 memset-power8 \ + mempcpy-power7 mempcpy-ppc64 memchr-power7 memchr-ppc64 \ + memrchr-power7 memrchr-ppc64 rawmemchr-power7 \ + rawmemchr-ppc64 strlen-power7 strlen-ppc64 \ + strnlen-power8 strnlen-power7 strnlen-ppc64 \ + strcasecmp-power7 strcasecmp_l-power7 \ + strncase-power7 strncase_l-power7 \ + strncmp-power9 strncmp-power8 strncmp-power7 \ + strncmp-power4 strncmp-ppc64 \ + strchr-power8 strchr-power7 strchr-ppc64 \ + strchrnul-power8 strchrnul-power7 strchrnul-ppc64 \ + strcpy-power8 strcpy-power7 strcpy-ppc64 stpcpy-power8 \ + stpcpy-power7 stpcpy-ppc64 \ + strrchr-power8 strrchr-power7 strrchr-ppc64 \ + strncat-power8 strncat-power7 strncat-ppc64 \ + strncpy-power7 strncpy-ppc64 \ + stpncpy-power8 stpncpy-power7 stpncpy-ppc64 \ + strcmp-power9 strcmp-power8 strcmp-power7 strcmp-ppc64 \ + strcat-power8 strcat-power7 strcat-ppc64 \ + memmove-power7 memmove-ppc64 wordcopy-ppc64 bcopy-ppc64 \ + strncpy-power8 strstr-power7 strstr-ppc64 \ + strspn-power8 strspn-ppc64 strcspn-power8 strcspn-ppc64 \ + strlen-power8 strcasestr-power8 strcasestr-ppc64 \ + strcasecmp-ppc64 strcasecmp-power8 strncase-ppc64 \ + strncase-power8 + +CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops +CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops +endif + +ifeq ($(subdir),wcsmbs) +sysdep_routines += wcschr-power7 wcschr-power6 wcschr-ppc64 \ + wcsrchr-power7 wcsrchr-power6 wcsrchr-ppc64 \ + wcscpy-power7 wcscpy-power6 wcscpy-ppc64 \ + +CFLAGS-wcschr-power7.c += -mcpu=power7 +CFLAGS-wcschr-power6.c += -mcpu=power6 +CFLAGS-wcsrchr-power7.c += -mcpu=power7 +CFLAGS-wcsrchr-power6.c += -mcpu=power6 +CFLAGS-wcscpy-power7.c += -mcpu=power7 +CFLAGS-wcscpy-power6.c += -mcpu=power6 +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bcopy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bcopy-ppc64.c new file mode 100644 index 0000000000..a8a097a614 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bcopy-ppc64.c @@ -0,0 +1,27 @@ +/* PowerPC64 default bcopy. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +extern __typeof (bcopy) __bcopy_ppc attribute_hidden; +extern __typeof (memmove) __memmove_ppc attribute_hidden; + +void __bcopy_ppc (const void *src, void *dest, size_t n) +{ + __memmove_ppc (dest, src, n); +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bcopy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bcopy.c new file mode 100644 index 0000000000..05d46e2b48 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bcopy.c @@ -0,0 +1,29 @@ +/* PowerPC64 multiarch bcopy. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include "init-arch.h" + +extern __typeof (bcopy) __bcopy_ppc attribute_hidden; +/* __bcopy_power7 symbol is implemented at memmove-power7.S */ +extern __typeof (bcopy) __bcopy_power7 attribute_hidden; + +libc_ifunc (bcopy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __bcopy_power7 + : __bcopy_ppc); diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bzero.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bzero.c new file mode 100644 index 0000000000..83b224b8d3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/bzero.c @@ -0,0 +1,43 @@ +/* Multiple versions of bzero. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# include <string.h> +# include <strings.h> +# include "init-arch.h" + +extern __typeof (bzero) __bzero_ppc attribute_hidden; +extern __typeof (bzero) __bzero_power4 attribute_hidden; +extern __typeof (bzero) __bzero_power6 attribute_hidden; +extern __typeof (bzero) __bzero_power7 attribute_hidden; +extern __typeof (bzero) __bzero_power8 attribute_hidden; + +libc_ifunc (__bzero, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __bzero_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __bzero_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __bzero_power6 : + (hwcap & PPC_FEATURE_POWER4) + ? __bzero_power4 + : __bzero_ppc); + +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c new file mode 100644 index 0000000000..eb173f8b05 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -0,0 +1,389 @@ +/* Enumerate available IFUNC implementations of a function. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <string.h> +#include <wchar.h> +#include <ldsodefs.h> +#include <ifunc-impl-list.h> + +/* Maximum number of IFUNC implementations. */ +#define MAX_IFUNC 6 + +size_t +__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + size_t max) +{ + assert (max >= MAX_IFUNC); + + size_t i = 0; + + unsigned long int hwcap = GLRO(dl_hwcap); + unsigned long int hwcap2 = GLRO(dl_hwcap2); + + /* hwcap contains only the latest supported ISA, the code checks which is + and fills the previous supported ones. */ + if (hwcap & PPC_FEATURE_ARCH_2_06) + hwcap |= PPC_FEATURE_ARCH_2_05 | PPC_FEATURE_POWER5_PLUS | + PPC_FEATURE_POWER5 | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_ARCH_2_05) + hwcap |= PPC_FEATURE_POWER5_PLUS | PPC_FEATURE_POWER5 | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_POWER5_PLUS) + hwcap |= PPC_FEATURE_POWER5 | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_POWER5) + hwcap |= PPC_FEATURE_POWER4; + +#ifdef SHARED + /* Support sysdeps/powerpc/powerpc64/multiarch/memcpy.c. */ + IFUNC_IMPL (i, name, memcpy, + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_HAS_VSX, + __memcpy_power7) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_ARCH_2_06, + __memcpy_a2) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_ARCH_2_05, + __memcpy_power6) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_CELL_BE, + __memcpy_cell) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_POWER4, + __memcpy_power4) + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/memmove.c. */ + IFUNC_IMPL (i, name, memmove, + IFUNC_IMPL_ADD (array, i, memmove, hwcap & PPC_FEATURE_HAS_VSX, + __memmove_power7) + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/memset.c. */ + IFUNC_IMPL (i, name, memset, + IFUNC_IMPL_ADD (array, i, memset, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __memset_power8) + IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_HAS_VSX, + __memset_power7) + IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_ARCH_2_05, + __memset_power6) + IFUNC_IMPL_ADD (array, i, memset, hwcap & PPC_FEATURE_POWER4, + __memset_power4) + IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcpy.c. */ + IFUNC_IMPL (i, name, strcpy, + IFUNC_IMPL_ADD (array, i, strcpy, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcpy_power8) + IFUNC_IMPL_ADD (array, i, strcpy, hwcap & PPC_FEATURE_HAS_VSX, + __strcpy_power7) + IFUNC_IMPL_ADD (array, i, strcpy, 1, + __strcpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/stpcpy.c. */ + IFUNC_IMPL (i, name, stpcpy, + IFUNC_IMPL_ADD (array, i, stpcpy, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __stpcpy_power8) + IFUNC_IMPL_ADD (array, i, stpcpy, hwcap & PPC_FEATURE_HAS_VSX, + __stpcpy_power7) + IFUNC_IMPL_ADD (array, i, stpcpy, 1, + __stpcpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strlen.c. */ + IFUNC_IMPL (i, name, strlen, + IFUNC_IMPL_ADD (array, i, strlen, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strlen_power8) + IFUNC_IMPL_ADD (array, i, strlen, hwcap & PPC_FEATURE_HAS_VSX, + __strlen_power7) + IFUNC_IMPL_ADD (array, i, strlen, 1, + __strlen_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncmp.c. */ + IFUNC_IMPL (i, name, strncmp, + IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_00, + __strncmp_power9) + IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strncmp_power8) + IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_HAS_VSX, + __strncmp_power7) + IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_POWER4, + __strncmp_power4) + IFUNC_IMPL_ADD (array, i, strncmp, 1, + __strncmp_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strchr.c. */ + IFUNC_IMPL (i, name, strchr, + IFUNC_IMPL_ADD (array, i, strchr, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strchr_power8) + IFUNC_IMPL_ADD (array, i, strchr, + hwcap & PPC_FEATURE_HAS_VSX, + __strchr_power7) + IFUNC_IMPL_ADD (array, i, strchr, 1, + __strchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strchrnul.c. */ + IFUNC_IMPL (i, name, strchrnul, + IFUNC_IMPL_ADD (array, i, strchrnul, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strchrnul_power8) + IFUNC_IMPL_ADD (array, i, strchrnul, + hwcap & PPC_FEATURE_HAS_VSX, + __strchrnul_power7) + IFUNC_IMPL_ADD (array, i, strchrnul, 1, + __strchrnul_ppc)) +#endif + + /* Support sysdeps/powerpc/powerpc64/multiarch/memcmp.c. */ + IFUNC_IMPL (i, name, memcmp, + IFUNC_IMPL_ADD (array, i, memcmp, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __memcmp_power8) + IFUNC_IMPL_ADD (array, i, memcmp, hwcap & PPC_FEATURE_HAS_VSX, + __memcmp_power7) + IFUNC_IMPL_ADD (array, i, memcmp, hwcap & PPC_FEATURE_POWER4, + __memcmp_power4) + IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/bzero.c. */ + IFUNC_IMPL (i, name, bzero, + IFUNC_IMPL_ADD (array, i, bzero, hwcap2 & PPC_FEATURE2_ARCH_2_07, + __bzero_power8) + IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_HAS_VSX, + __bzero_power7) + IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_ARCH_2_05, + __bzero_power6) + IFUNC_IMPL_ADD (array, i, bzero, hwcap & PPC_FEATURE_POWER4, + __bzero_power4) + IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/bcopy.c. */ + IFUNC_IMPL (i, name, bcopy, + IFUNC_IMPL_ADD (array, i, bcopy, hwcap & PPC_FEATURE_HAS_VSX, + __bcopy_power7) + IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/mempcpy.c. */ + IFUNC_IMPL (i, name, mempcpy, + IFUNC_IMPL_ADD (array, i, mempcpy, + hwcap & PPC_FEATURE_HAS_VSX, + __mempcpy_power7) + IFUNC_IMPL_ADD (array, i, mempcpy, 1, + __mempcpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/memchr.c. */ + IFUNC_IMPL (i, name, memchr, + IFUNC_IMPL_ADD (array, i, memchr, + hwcap & PPC_FEATURE_HAS_VSX, + __memchr_power7) + IFUNC_IMPL_ADD (array, i, memchr, 1, + __memchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/memrchr.c. */ + IFUNC_IMPL (i, name, memrchr, + IFUNC_IMPL_ADD (array, i, memrchr, + hwcap & PPC_FEATURE_HAS_VSX, + __memrchr_power7) + IFUNC_IMPL_ADD (array, i, memrchr, 1, + __memrchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c. */ + IFUNC_IMPL (i, name, rawmemchr, + IFUNC_IMPL_ADD (array, i, rawmemchr, + hwcap & PPC_FEATURE_HAS_VSX, + __rawmemchr_power7) + IFUNC_IMPL_ADD (array, i, rawmemchr, 1, + __rawmemchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strnlen.c. */ + IFUNC_IMPL (i, name, strnlen, + IFUNC_IMPL_ADD (array, i, strnlen, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strnlen_power8) + IFUNC_IMPL_ADD (array, i, strnlen, hwcap & PPC_FEATURE_HAS_VSX, + __strnlen_power7) + IFUNC_IMPL_ADD (array, i, strnlen, 1, + __strnlen_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c. */ + IFUNC_IMPL (i, name, strcasecmp, + IFUNC_IMPL_ADD (array, i, strcasecmp, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcasecmp_power8) + IFUNC_IMPL_ADD (array, i, strcasecmp, + hwcap & PPC_FEATURE_HAS_VSX, + __strcasecmp_power7) + IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l.c. */ + IFUNC_IMPL (i, name, strcasecmp_l, + IFUNC_IMPL_ADD (array, i, strcasecmp_l, + hwcap & PPC_FEATURE_HAS_VSX, + __strcasecmp_l_power7) + IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1, + __strcasecmp_l_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncase.c. */ + IFUNC_IMPL (i, name, strncasecmp, + IFUNC_IMPL_ADD (array, i, strncasecmp, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strncasecmp_power8) + IFUNC_IMPL_ADD (array, i, strncasecmp, + hwcap & PPC_FEATURE_HAS_VSX, + __strncasecmp_power7) + IFUNC_IMPL_ADD (array, i, strncasecmp, 1, __strncasecmp_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncase_l.c. */ + IFUNC_IMPL (i, name, strncasecmp_l, + IFUNC_IMPL_ADD (array, i, strncasecmp_l, + hwcap & PPC_FEATURE_HAS_VSX, + __strncasecmp_l_power7) + IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1, + __strncasecmp_l_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/wcschr.c. */ + IFUNC_IMPL (i, name, wcschr, + IFUNC_IMPL_ADD (array, i, wcschr, + hwcap & PPC_FEATURE_HAS_VSX, + __wcschr_power7) + IFUNC_IMPL_ADD (array, i, wcschr, + hwcap & PPC_FEATURE_ARCH_2_05, + __wcschr_power6) + IFUNC_IMPL_ADD (array, i, wcschr, 1, + __wcschr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/wcschr.c. */ + IFUNC_IMPL (i, name, wcsrchr, + IFUNC_IMPL_ADD (array, i, wcsrchr, + hwcap & PPC_FEATURE_HAS_VSX, + __wcsrchr_power7) + IFUNC_IMPL_ADD (array, i, wcsrchr, + hwcap & PPC_FEATURE_ARCH_2_05, + __wcsrchr_power6) + IFUNC_IMPL_ADD (array, i, wcsrchr, 1, + __wcsrchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/wcscpy.c. */ + IFUNC_IMPL (i, name, wcscpy, + IFUNC_IMPL_ADD (array, i, wcscpy, + hwcap & PPC_FEATURE_HAS_VSX, + __wcscpy_power7) + IFUNC_IMPL_ADD (array, i, wcscpy, + hwcap & PPC_FEATURE_ARCH_2_05, + __wcscpy_power6) + IFUNC_IMPL_ADD (array, i, wcscpy, 1, + __wcscpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strrchr.c. */ + IFUNC_IMPL (i, name, strrchr, + IFUNC_IMPL_ADD (array, i, strrchr, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strrchr_power8) + IFUNC_IMPL_ADD (array, i, strrchr, + hwcap & PPC_FEATURE_HAS_VSX, + __strrchr_power7) + IFUNC_IMPL_ADD (array, i, strrchr, 1, + __strrchr_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncat.c. */ + IFUNC_IMPL (i, name, strncat, + IFUNC_IMPL_ADD (array, i, strncat, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strncat_power8) + IFUNC_IMPL_ADD (array, i, strncat, + hwcap & PPC_FEATURE_HAS_VSX, + __strncat_power7) + IFUNC_IMPL_ADD (array, i, strncat, 1, + __strncat_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strncpy.c. */ + IFUNC_IMPL (i, name, strncpy, + IFUNC_IMPL_ADD (array, i, strncpy, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strncpy_power8) + IFUNC_IMPL_ADD (array, i, strncpy, + hwcap & PPC_FEATURE_HAS_VSX, + __strncpy_power7) + IFUNC_IMPL_ADD (array, i, strncpy, 1, + __strncpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/stpncpy.c. */ + IFUNC_IMPL (i, name, stpncpy, + IFUNC_IMPL_ADD (array, i, stpncpy, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __stpncpy_power8) + IFUNC_IMPL_ADD (array, i, stpncpy, + hwcap & PPC_FEATURE_HAS_VSX, + __stpncpy_power7) + IFUNC_IMPL_ADD (array, i, stpncpy, 1, + __stpncpy_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcmp.c. */ + IFUNC_IMPL (i, name, strcmp, + IFUNC_IMPL_ADD (array, i, strcmp, + hwcap2 & PPC_FEATURE2_ARCH_3_00, + __strcmp_power9) + IFUNC_IMPL_ADD (array, i, strcmp, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcmp_power8) + IFUNC_IMPL_ADD (array, i, strcmp, + hwcap & PPC_FEATURE_HAS_VSX, + __strcmp_power7) + IFUNC_IMPL_ADD (array, i, strcmp, 1, + __strcmp_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcat.c. */ + IFUNC_IMPL (i, name, strcat, + IFUNC_IMPL_ADD (array, i, strcat, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcat_power8) + IFUNC_IMPL_ADD (array, i, strcat, + hwcap & PPC_FEATURE_HAS_VSX, + __strcat_power7) + IFUNC_IMPL_ADD (array, i, strcat, 1, + __strcat_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strspn.c. */ + IFUNC_IMPL (i, name, strspn, + IFUNC_IMPL_ADD (array, i, strspn, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strspn_power8) + IFUNC_IMPL_ADD (array, i, strspn, 1, + __strspn_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcspn.c. */ + IFUNC_IMPL (i, name, strcspn, + IFUNC_IMPL_ADD (array, i, strcspn, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcspn_power8) + IFUNC_IMPL_ADD (array, i, strcspn, 1, + __strcspn_ppc)) + + /* Support sysdeps/powerpc/powerpc64/multiarch/strstr.c. */ + IFUNC_IMPL (i, name, strstr, + IFUNC_IMPL_ADD (array, i, strstr, + hwcap & PPC_FEATURE_HAS_VSX, + __strstr_power7) + IFUNC_IMPL_ADD (array, i, strstr, 1, + __strstr_ppc)) + + + /* Support sysdeps/powerpc/powerpc64/multiarch/strcasestr.c. */ + IFUNC_IMPL (i, name, strcasestr, + IFUNC_IMPL_ADD (array, i, strcasestr, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcasestr_power8) + IFUNC_IMPL_ADD (array, i, strcasestr, 1, + __strcasestr_ppc)) + + return i; +} diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/init-arch.h b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/init-arch.h new file mode 100644 index 0000000000..dbbe83c67c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/init-arch.h @@ -0,0 +1,18 @@ +/* This file is part of the GNU C Library. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/init-arch.h> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr-power7.S new file mode 100644 index 0000000000..fedca9c997 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr-power7.S @@ -0,0 +1,28 @@ +/* Optimized memchr implementation for PowerPC64/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCHR __memchr_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name,alias) + +#include <sysdeps/powerpc/powerpc64/power7/memchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr-ppc64.c new file mode 100644 index 0000000000..b67631f017 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr-ppc64.c @@ -0,0 +1,31 @@ +/* PowerPC64 default implementation of memchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define MEMCHR __memchr_ppc + +#undef weak_alias +#define weak_alias(a, b) + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) + +extern __typeof (memchr) __memchr_ppc attribute_hidden; + +#include <string/memchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr.c new file mode 100644 index 0000000000..f6f4babc09 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memchr.c @@ -0,0 +1,38 @@ +/* Multiple versions of memchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__memchr) __memchr_ppc attribute_hidden; +extern __typeof (__memchr) __memchr_power7 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__memchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memchr_power7 + : __memchr_ppc); + +weak_alias (__memchr, memchr) +libc_hidden_builtin_def (memchr) +#else +#include <string/memchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power4.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power4.S new file mode 100644 index 0000000000..e38b2a9c44 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power4.S @@ -0,0 +1,28 @@ +/* Optimized memcmp implementation for PowerPC64/POWER4. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCMP __memcmp_power4 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name,alias) + +#include <sysdeps/powerpc/powerpc64/power4/memcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power7.S new file mode 100644 index 0000000000..a9cc979b92 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power7.S @@ -0,0 +1,28 @@ +/* Optimized memcmp implementation for PowerPC64/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCMP __memcmp_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name,alias) + +#include <sysdeps/powerpc/powerpc64/power7/memcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power8.S new file mode 100644 index 0000000000..b7837035b1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-power8.S @@ -0,0 +1,28 @@ +/* Optimized memcmp implementation for PowerPC64/POWER8. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCMP __memcmp_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name,alias) + +#include <sysdeps/powerpc/powerpc64/power8/memcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-ppc64.c new file mode 100644 index 0000000000..3bd035dc49 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp-ppc64.c @@ -0,0 +1,33 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define MEMCMP __memcmp_ppc +#undef weak_alias +#define weak_alias(name, aliasname) \ + extern __typeof (__memcmp_ppc) aliasname \ + __attribute__ ((weak, alias ("__memcmp_ppc"))); +#if IS_IN (libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__memcmp_ppc, __GI_memcmp, __memcmp_ppc); +#endif + +extern __typeof (memcmp) __memcmp_ppc attribute_hidden; + +#include <string/memcmp.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp.c new file mode 100644 index 0000000000..0d315d5e70 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcmp.c @@ -0,0 +1,44 @@ +/* Multiple versions of memcmp. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# define memcmp __redirect_memcmp +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (memcmp) __memcmp_ppc attribute_hidden; +extern __typeof (memcmp) __memcmp_power4 attribute_hidden; +extern __typeof (memcmp) __memcmp_power7 attribute_hidden; +extern __typeof (memcmp) __memcmp_power8 attribute_hidden; +# undef memcmp + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_memcmp, memcmp, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __memcmp_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memcmp_power7 + : (hwcap & PPC_FEATURE_POWER4) + ? __memcmp_power4 + : __memcmp_ppc); +#else +#include <string/memcmp.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-a2.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-a2.S new file mode 100644 index 0000000000..a942287900 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-a2.S @@ -0,0 +1,26 @@ +/* Optimized memcpy implementation for PowerPC A2. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCPY __memcpy_a2 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/a2/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-cell.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-cell.S new file mode 100644 index 0000000000..39aa30c729 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-cell.S @@ -0,0 +1,26 @@ +/* Optimized memcpy implementation for PowerPC/CELL. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCPY __memcpy_cell + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/cell/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power4.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power4.S new file mode 100644 index 0000000000..6e7fea382b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power4.S @@ -0,0 +1,26 @@ +/* Optimized memcpy implementation for PowerPC64/POWER4. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCPY __memcpy_power4 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power4/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power6.S new file mode 100644 index 0000000000..40bcdb1161 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power6.S @@ -0,0 +1,26 @@ +/* Optimized memcpy implementation for PowerPC/POWER6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCPY __memcpy_power6 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power6/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power7.S new file mode 100644 index 0000000000..222936af63 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-power7.S @@ -0,0 +1,26 @@ +/* Optimized memcpy implementation for PowerPC/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMCPY __memcpy_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-ppc64.S new file mode 100644 index 0000000000..2dc644c809 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy-ppc64.S @@ -0,0 +1,28 @@ +/* Default memcpy implementation for PowerPC64. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && IS_IN (libc) +# define MEMCPY __memcpy_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) +#endif + +#include <sysdeps/powerpc/powerpc64/memcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy.c new file mode 100644 index 0000000000..9f4286c4fe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memcpy.c @@ -0,0 +1,55 @@ +/* Multiple versions of memcpy. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for the definition in lib and for + DSO. In static binaries we need memcpy before the initialization + happened. */ +#if defined SHARED && IS_IN (libc) +/* Redefine memcpy so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef memcpy +# define memcpy __redirect_memcpy +# include <string.h> +# include "init-arch.h" + +extern __typeof (__redirect_memcpy) __libc_memcpy; + +extern __typeof (__redirect_memcpy) __memcpy_ppc attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_power4 attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_cell attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_power6 attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_a2 attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_power7 attribute_hidden; + +libc_ifunc (__libc_memcpy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memcpy_power7 : + (hwcap & PPC_FEATURE_ARCH_2_06) + ? __memcpy_a2 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __memcpy_power6 : + (hwcap & PPC_FEATURE_CELL_BE) + ? __memcpy_cell : + (hwcap & PPC_FEATURE_POWER4) + ? __memcpy_power4 + : __memcpy_ppc); + +#undef memcpy +strong_alias (__libc_memcpy, memcpy); +libc_hidden_ver (__libc_memcpy, memcpy); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove-power7.S new file mode 100644 index 0000000000..a9435fa654 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove-power7.S @@ -0,0 +1,29 @@ +/* Optimized memmove implementation for PowerPC64/POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMMOVE __memmove_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef bcopy +#define bcopy __bcopy_power7 + +#include <sysdeps/powerpc/powerpc64/power7/memmove.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove-ppc64.c new file mode 100644 index 0000000000..80353c5332 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove-ppc64.c @@ -0,0 +1,44 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <memcopy.h> + +extern __typeof (_wordcopy_fwd_aligned) _wordcopy_fwd_aligned_ppc; +extern __typeof (_wordcopy_fwd_dest_aligned) _wordcopy_fwd_dest_aligned_ppc; +extern __typeof (_wordcopy_bwd_aligned) _wordcopy_bwd_aligned_ppc; +extern __typeof (_wordcopy_bwd_dest_aligned) _wordcopy_bwd_dest_aligned_ppc; + +#define _wordcopy_fwd_aligned _wordcopy_fwd_aligned_ppc +#define _wordcopy_fwd_dest_aligned _wordcopy_fwd_dest_aligned_ppc +#define _wordcopy_bwd_aligned _wordcopy_bwd_aligned_ppc +#define _wordcopy_bwd_dest_aligned _wordcopy_bwd_dest_aligned_ppc + +extern __typeof (memmove) __memmove_ppc attribute_hidden; +#define MEMMOVE __memmove_ppc + +extern __typeof (memcpy) __memcpy_ppc attribute_hidden; +#ifdef SHARED +# define memcpy __memcpy_ppc +#endif + +#if IS_IN (libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) +#endif + +#include <string/memmove.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove.c new file mode 100644 index 0000000000..db2bbc7837 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memmove.c @@ -0,0 +1,45 @@ +/* Multiple versions of memmove. PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for the definition in lib and for + DSO. In static binaries we need memmove before the initialization + happened. */ +#if defined SHARED && IS_IN (libc) +/* Redefine memmove so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef memmove +# define memmove __redirect_memmove +# include <string.h> +# include "init-arch.h" + +extern __typeof (__redirect_memmove) __libc_memmove; + +extern __typeof (__redirect_memmove) __memmove_ppc attribute_hidden; +extern __typeof (__redirect_memmove) __memmove_power7 attribute_hidden; + +libc_ifunc (__libc_memmove, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memmove_power7 + : __memmove_ppc); + +#undef memmove +strong_alias (__libc_memmove, memmove); +libc_hidden_ver (__libc_memmove, memmove); +#else +# include <string/memmove.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy-power7.S new file mode 100644 index 0000000000..08f133644a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy-power7.S @@ -0,0 +1,28 @@ +/* Optimized mempcpy implementation for PowerPC/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMPCPY __mempcpy_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name, alias) + +#include <sysdeps/powerpc/powerpc64/power7/mempcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy-ppc64.c new file mode 100644 index 0000000000..d0741fe318 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy-ppc64.c @@ -0,0 +1,19 @@ +/* PowerPC64 default implementation of mempcpy. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy.c new file mode 100644 index 0000000000..430557ee0a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/mempcpy.c @@ -0,0 +1,44 @@ +/* Multiple versions of mempcpy. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define mempcpy __redirect_mempcpy +# define __mempcpy __redirect___mempcpy +# define NO_MEMPCPY_STPCPY_REDIRECT +/* Omit the mempcpy inline definitions because it would redefine mempcpy. */ +# define _HAVE_STRING_ARCH_mempcpy 1 +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__mempcpy) __mempcpy_ppc attribute_hidden; +extern __typeof (__mempcpy) __mempcpy_power7 attribute_hidden; +# undef mempcpy +# undef __mempcpy + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect___mempcpy, __mempcpy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __mempcpy_power7 + : __mempcpy_ppc); + +weak_alias (__mempcpy, mempcpy) +#else +# include <string/mempcpy.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr-power7.S new file mode 100644 index 0000000000..052aa732ba --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr-power7.S @@ -0,0 +1,28 @@ +/* Optimized memrchr implementation for PowerPC64/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMRCHR __memrchr_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name,alias) + +#include <sysdeps/powerpc/powerpc64/power7/memrchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c new file mode 100644 index 0000000000..2fc706db71 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr-ppc64.c @@ -0,0 +1,19 @@ +/* PowerPC64 default implementation of memrchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr.c new file mode 100644 index 0000000000..fb09fdf89c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memrchr.c @@ -0,0 +1,37 @@ +/* Multiple versions of memrchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__memrchr) __memrchr_ppc attribute_hidden; +extern __typeof (__memrchr) __memrchr_power7 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__memrchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memrchr_power7 + : __memrchr_ppc); + +weak_alias (__memrchr, memrchr) +#else +#include <string/memrchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S new file mode 100644 index 0000000000..3908e8e412 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S @@ -0,0 +1,29 @@ +/* Optimized memset implementation for PowerPC64/POWER4. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMSET __memset_power4 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef __bzero +#define __bzero __bzero_power4 + +#include <sysdeps/powerpc/powerpc64/power4/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power6.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power6.S new file mode 100644 index 0000000000..4ddbd2e274 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power6.S @@ -0,0 +1,29 @@ +/* Optimized memset implementation for PowerPC64/POWER6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMSET __memset_power6 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef __bzero +#define __bzero __bzero_power6 + +#include <sysdeps/powerpc/powerpc64/power6/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power7.S new file mode 100644 index 0000000000..97f686b35d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power7.S @@ -0,0 +1,28 @@ +/* Optimized memset implementation for PowerPC64/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMSET __memset_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef __bzero +#define __bzero __bzero_power7 +#include <sysdeps/powerpc/powerpc64/power7/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S new file mode 100644 index 0000000000..ea303533f4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-power8.S @@ -0,0 +1,29 @@ +/* Optimized memset implementation for PowerPC64/POWER8. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MEMSET __memset_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#undef __bzero +#define __bzero __bzero_power8 + +#include <sysdeps/powerpc/powerpc64/power8/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S new file mode 100644 index 0000000000..0f16e21c61 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S @@ -0,0 +1,42 @@ +/* Default memset/bzero implementation for PowerPC64. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. NOTE: this code should be positioned + before ENTRY/END_GEN_TB redefinition. */ +ENTRY (__bzero_ppc) + CALL_MCOUNT 3 + mr r5,r4 + li r4,0 + b L(_memset) +END_GEN_TB (__bzero_ppc,TB_TOCLESS) + + +#if defined SHARED && IS_IN (libc) +# define MEMSET __memset_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) +#endif + +/* Do not implement __bzero at powerpc64/memset.S. */ +#define NO_BZERO_IMPL + +#include <sysdeps/powerpc/powerpc64/memset.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset.c new file mode 100644 index 0000000000..a5d9b3e60e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/memset.c @@ -0,0 +1,53 @@ +/* Multiple versions of memset. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if defined SHARED && IS_IN (libc) +/* Redefine memset so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef memset +# define memset __redirect_memset +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__redirect_memset) __libc_memset; + +extern __typeof (__redirect_memset) __memset_ppc attribute_hidden; +extern __typeof (__redirect_memset) __memset_power4 attribute_hidden; +extern __typeof (__redirect_memset) __memset_power6 attribute_hidden; +extern __typeof (__redirect_memset) __memset_power7 attribute_hidden; +extern __typeof (__redirect_memset) __memset_power8 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__libc_memset, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __memset_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __memset_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __memset_power6 : + (hwcap & PPC_FEATURE_POWER4) + ? __memset_power4 + : __memset_ppc); + +#undef memset +strong_alias (__libc_memset, memset); +libc_hidden_ver (__libc_memset, memset); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-power7.S new file mode 100644 index 0000000000..d79d72820c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-power7.S @@ -0,0 +1,23 @@ +/* Optimized rawmemchr implementation for PowerPC64/POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define RAWMEMCHR __rawmemchr_power7 + +#include <sysdeps/powerpc/powerpc64/power7/rawmemchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-ppc64.c new file mode 100644 index 0000000000..cb55dbcc50 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr-ppc64.c @@ -0,0 +1,19 @@ +/* PowerPC64 default implementation of rawmemchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c new file mode 100644 index 0000000000..8bfd58dd47 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c @@ -0,0 +1,39 @@ +/* Multiple versions of rawmemchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define __rawmemchr __redirect___rawmemchr +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__rawmemchr) __rawmemchr_ppc attribute_hidden; +extern __typeof (__rawmemchr) __rawmemchr_power7 attribute_hidden; +# undef __rawmemchr + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect___rawmemchr, __rawmemchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __rawmemchr_power7 + : __rawmemchr_ppc); + +weak_alias (__rawmemchr, rawmemchr) +#else +#include <string/rawmemchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rtld-memset.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rtld-memset.c new file mode 100644 index 0000000000..7fb4b733e4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rtld-memset.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc64/rtld-memset.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rtld-strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rtld-strchr.S new file mode 100644 index 0000000000..16ba7264c4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/rtld-strchr.S @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc64/strchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.c new file mode 100644 index 0000000000..e4b9ce9b6f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.c @@ -0,0 +1,36 @@ +/* Multiarch stpcpy for POWER7/PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +extern __typeof (memcpy) __memcpy_power7 attribute_hidden; +extern __typeof (strlen) __strlen_power7 attribute_hidden; +extern __typeof (stpcpy) __stpcpy_power7 attribute_hidden; + +#define STPCPY __stpcpy_power7 +#define memcpy __memcpy_power7 +#define strlen __strlen_power7 + +#undef libc_hidden_def +#define libc_hidden_def(name) +#undef weak_alias +#define weak_alias(name, alias) +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <string/stpcpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power8.S new file mode 100644 index 0000000000..935347115a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-power8.S @@ -0,0 +1,26 @@ +/* Optimized stpcpy implementation for POWER8/PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STPCPY __stpcpy_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/stpcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.c new file mode 100644 index 0000000000..b5a3b12c05 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.c @@ -0,0 +1,37 @@ +/* Multiarch stpcpy for PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +extern __typeof (memcpy) __memcpy_ppc attribute_hidden; +extern __typeof (strlen) __strlen_ppc attribute_hidden; +extern __typeof (stpcpy) __stpcpy_ppc attribute_hidden; + +#define STPCPY __stpcpy_ppc +#define memcpy __memcpy_ppc +#define strlen __strlen_ppc + +#undef weak_alias +#define weak_alias(name, aliasname) + +#undef libc_hidden_def +#define libc_hidden_def(name) +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <string/stpcpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy.c new file mode 100644 index 0000000000..3e34e3cafe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpcpy.c @@ -0,0 +1,41 @@ +/* Multiple versions of stpcpy. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) +# define NO_MEMPCPY_STPCPY_REDIRECT +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__stpcpy) __stpcpy_ppc attribute_hidden; +extern __typeof (__stpcpy) __stpcpy_power7 attribute_hidden; +extern __typeof (__stpcpy) __stpcpy_power8 attribute_hidden; + +libc_ifunc_hidden (__stpcpy, __stpcpy, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __stpcpy_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __stpcpy_power7 + : __stpcpy_ppc); + +weak_alias (__stpcpy, stpcpy) +libc_hidden_def (__stpcpy) +libc_hidden_def (stpcpy) +#else +# include <string/stpcpy.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power7.S new file mode 100644 index 0000000000..6636b01d07 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power7.S @@ -0,0 +1,30 @@ +/* Optimized stpncpy implementation for POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define USE_AS_STPNCPY + +#define STPNCPY __stpncpy_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#define MEMSET __memset_power7 + +#include <sysdeps/powerpc/powerpc64/power7/stpncpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S new file mode 100644 index 0000000000..6ce706a879 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-power8.S @@ -0,0 +1,28 @@ +/* Optimized stpncpy implementation for POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define USE_AS_STPNCPY + +#define STPNCPY __stpncpy_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/stpncpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-ppc64.c new file mode 100644 index 0000000000..22186166a0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy-ppc64.c @@ -0,0 +1,26 @@ +/* Default stpncpy implementation for PowerPC64. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define STPNCPY __stpncpy_ppc +#ifdef SHARED +#undef libc_hidden_def +#define libc_hidden_def(name) \ + __hidden_ver1 (__stpncpy_ppc, __GI___stpncpy, __stpncpy_ppc); +#endif + +#include <string/stpncpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy.c new file mode 100644 index 0000000000..e9b37dcc9a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/stpncpy.c @@ -0,0 +1,39 @@ +/* Multiple versions of stpncpy. PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define stpncpy __redirect_stpncpy +# define __stpncpy __redirect___stpncpy +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__stpncpy) __stpncpy_ppc attribute_hidden; +extern __typeof (__stpncpy) __stpncpy_power7 attribute_hidden; +extern __typeof (__stpncpy) __stpncpy_power8 attribute_hidden; +# undef stpncpy +# undef __stpncpy + +libc_ifunc_redirected (__redirect___stpncpy, __stpncpy, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __stpncpy_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __stpncpy_power7 + : __stpncpy_ppc); +weak_alias (__stpncpy, stpncpy) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S new file mode 100644 index 0000000000..025c5a9f13 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S @@ -0,0 +1,28 @@ +/* Optimized strcasecmp implementation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define __strcasecmp __strcasecmp_power7 +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strcasecmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S new file mode 100644 index 0000000000..9b62476e09 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S @@ -0,0 +1,28 @@ +/* Optimized strcasecmp implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define __strcasecmp __strcasecmp_power8 +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c new file mode 100644 index 0000000000..cbf91755da --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c @@ -0,0 +1,21 @@ +/* Multiarch strcasecmp for PPC64. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define strcasecmp __strcasecmp_ppc + +#include <string/strcasecmp.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c new file mode 100644 index 0000000000..dcb4ef4125 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c @@ -0,0 +1,36 @@ +/* Multiple versions of strcasecmp + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__strcasecmp) __libc_strcasecmp; + +extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden; +extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden; +extern __typeof (__strcasecmp) __strcasecmp_power8 attribute_hidden; + +libc_ifunc (__libc_strcasecmp, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcasecmp_power8: + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcasecmp_power7 + : __strcasecmp_ppc); + +weak_alias (__libc_strcasecmp, strcasecmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l-power7.S new file mode 100644 index 0000000000..da4c4054c3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l-power7.S @@ -0,0 +1,31 @@ +/* Optimized strcasecmp_l implementation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define __strcasecmp __strcasecmp_l_power7 + +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#define USE_IN_EXTENDED_LOCALE_MODEL + +#include <sysdeps/powerpc/powerpc64/power7/strcasecmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l.c new file mode 100644 index 0000000000..10b8f2e84d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l.c @@ -0,0 +1,40 @@ +/* Multiple versions of strcasecmp_l. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# define strcasecmp_l __strcasecmp_l_ppc +extern __typeof (__strcasecmp_l) __strcasecmp_l_ppc attribute_hidden; +extern __typeof (__strcasecmp_l) __strcasecmp_l_power7 attribute_hidden; +#endif + +#include <string/strcasecmp_l.c> +#undef strcasecmp_l + +#if IS_IN (libc) +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strcasecmp_l) __libc_strcasecmp_l; +libc_ifunc (__libc_strcasecmp_l, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcasecmp_l_power7 + : __strcasecmp_l_ppc); + +weak_alias (__libc_strcasecmp_l, strcasecmp_l) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr-power8.S new file mode 100644 index 0000000000..2cfb5ae77a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr-power8.S @@ -0,0 +1,35 @@ +/* Optimized strcasestr implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCASESTR __strcasestr_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +/* The following definitions are used in strcasestr optimization. */ + +/* strlen is used to calculate len of r4. */ +#define STRLEN __strlen_power8 +/* strnlen is used to check if len of r3 is more than r4. */ +#define STRNLEN __strnlen_power7 +/* strchr is used to check if first char of r4 is present in r3. */ +#define STRCHR __strchr_power8 + +#include <sysdeps/powerpc/powerpc64/power8/strcasestr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr-ppc64.c new file mode 100644 index 0000000000..61f278f697 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr-ppc64.c @@ -0,0 +1,34 @@ +/* PowerPC64 default implementation of strcasestr. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRCASESTR __strcasestr_ppc +#if IS_IN (libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__strcasestr_ppc, __GI_strcasestr, __strcasestr_ppc); +#endif + + +#undef weak_alias +#define weak_alias(a,b) + +extern __typeof (strcasestr) __strcasestr_ppc attribute_hidden; + +#include <string/strcasestr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr.c new file mode 100644 index 0000000000..9e6a16d6a9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcasestr.c @@ -0,0 +1,37 @@ +/* Multiple versions of strcasestr. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strcasestr) __strcasestr_ppc attribute_hidden; +extern __typeof (__strcasestr) __strcasestr_power8 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__strcasestr, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcasestr_power8 + : __strcasestr_ppc); + +weak_alias (__strcasestr, strcasestr) +#else +#include <string/strcasestr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c new file mode 100644 index 0000000000..22d2caaec3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-power7.c @@ -0,0 +1,30 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRCAT __strcat_power7 + +#undef libc_hidden_def +#define libc_hidden_def(name) + +extern typeof (strcpy) __strcpy_power7; +extern typeof (strlen) __strlen_power7; + +#define strcpy __strcpy_power7 +#define strlen __strlen_power7 +#include <string/strcat.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c new file mode 100644 index 0000000000..f138beec67 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-power8.c @@ -0,0 +1,30 @@ +/* Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRCAT __strcat_power8 + +#undef libc_hidden_def +#define libc_hidden_def(name) + +extern typeof (strcpy) __strcpy_power8; +extern typeof (strlen) __strlen_power8; + +#define strcpy __strcpy_power8 +#define strlen __strlen_power8 +#include <string/strcat.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c new file mode 100644 index 0000000000..5049fc03f4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat-ppc64.c @@ -0,0 +1,29 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRCAT __strcat_ppc +#ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strcat_ppc, __GI_strcat, __strcat_ppc); +#endif + +extern __typeof (strcat) __strcat_ppc attribute_hidden; + +#include <string/strcat.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat.c new file mode 100644 index 0000000000..3336aedcec --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcat.c @@ -0,0 +1,36 @@ +/* Multiple versions of strcat. PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define strcat __redirect_strcat +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strcat) __strcat_ppc attribute_hidden; +extern __typeof (strcat) __strcat_power7 attribute_hidden; +extern __typeof (strcat) __strcat_power8 attribute_hidden; +# undef strcat + +libc_ifunc_redirected (__redirect_strcat, strcat, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcat_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcat_power7 + : __strcat_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-power7.S new file mode 100644 index 0000000000..e64c0b7c82 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-power7.S @@ -0,0 +1,26 @@ +/* Optimized strchr implementation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCHR __strchr_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-power8.S new file mode 100644 index 0000000000..bbda7b0505 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-power8.S @@ -0,0 +1,26 @@ +/* Optimized strchr implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCHR __strchr_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-ppc64.S new file mode 100644 index 0000000000..769f9f07d4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr-ppc64.S @@ -0,0 +1,29 @@ +/* PowerPC64 default implementation of strchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifdef SHARED +# define STRCHR __strchr_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_strchr; __GI_strchr = __strchr_ppc +#endif + +#include <sysdeps/powerpc/powerpc64/strchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr.c new file mode 100644 index 0000000000..573105818f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchr.c @@ -0,0 +1,42 @@ +/* Multiple versions of strchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if defined SHARED && IS_IN (libc) +# define strchr __redirect_strchr +/* Omit the strchr inline definitions because it would redefine strchr. */ +# define __NO_STRING_INLINES +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strchr) __strchr_ppc attribute_hidden; +extern __typeof (strchr) __strchr_power7 attribute_hidden; +extern __typeof (strchr) __strchr_power8 attribute_hidden; +# undef strchr + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strchr, strchr, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strchr_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strchr_power7 + : __strchr_ppc); +weak_alias (strchr, index) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power7.S new file mode 100644 index 0000000000..c8e28721fd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power7.S @@ -0,0 +1,26 @@ +/* Optimized strchrnul implementation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCHRNUL __strchrnul_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strchrnul.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power8.S new file mode 100644 index 0000000000..1cd39fc1b3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-power8.S @@ -0,0 +1,26 @@ +/* Optimized strchrnul implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCHRNUL __strchrnul_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strchrnul.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-ppc64.c new file mode 100644 index 0000000000..8d313c3e1d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul-ppc64.c @@ -0,0 +1,19 @@ +/* PowerPC64 default implementation of strchrnul. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c new file mode 100644 index 0000000000..1e9018f88a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strchrnul.c @@ -0,0 +1,40 @@ +/* Multiple versions of strchrnul. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strchrnul) __strchrnul_ppc attribute_hidden; +extern __typeof (__strchrnul) __strchrnul_power7 attribute_hidden; +extern __typeof (__strchrnul) __strchrnul_power8 attribute_hidden; + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc (__strchrnul, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strchrnul_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strchrnul_power7 + : __strchrnul_ppc); + +weak_alias (__strchrnul, strchrnul) +#else +#include <string/strchrnul.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power7.S new file mode 100644 index 0000000000..82d1b63af9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power7.S @@ -0,0 +1,26 @@ +/* Optimized strcmp implementation for POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCMP __strcmp_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power8.S new file mode 100644 index 0000000000..b2464a8018 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power8.S @@ -0,0 +1,26 @@ +/* Optimized strcmp implementation for POWER8/PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCMP __strcmp_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power9.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power9.S new file mode 100644 index 0000000000..48ea05d2c5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-power9.S @@ -0,0 +1,26 @@ +/* Optimized strcmp implementation for POWER9/PPC64. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCMP __strcmp_power9 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power9/strcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-ppc64.S new file mode 100644 index 0000000000..085e74758f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp-ppc64.S @@ -0,0 +1,29 @@ +/* Default strcmp implementation for PowerPC64. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && IS_IN (libc) +# define STRCMP __strcmp_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_strcmp; __GI_strcmp = __strcmp_ppc +#endif /* SHARED && IS_IN */ + +#include <sysdeps/powerpc/powerpc64/strcmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp.c new file mode 100644 index 0000000000..fc10205b00 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcmp.c @@ -0,0 +1,42 @@ +/* Multiple versions of strcmp. PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) +# define strcmp __redirect_strcmp +/* Omit the strcmp inline definitions because it would redefine strcmp. */ +# define __NO_STRING_INLINES +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strcmp) __strcmp_ppc attribute_hidden; +extern __typeof (strcmp) __strcmp_power7 attribute_hidden; +extern __typeof (strcmp) __strcmp_power8 attribute_hidden; +extern __typeof (strcmp) __strcmp_power9 attribute_hidden; + +# undef strcmp + +libc_ifunc_redirected (__redirect_strcmp, strcmp, + (hwcap2 & PPC_FEATURE2_ARCH_3_00) + ? __strcmp_power9 : + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcmp_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcmp_power7 + : __strcmp_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.c new file mode 100644 index 0000000000..892a551183 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.c @@ -0,0 +1,32 @@ +/* Multiarch strcpy for POWER7/PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +extern __typeof (memcpy) __memcpy_power7 attribute_hidden; +extern __typeof (strlen) __strlen_power7 attribute_hidden; +extern __typeof (strcpy) __strcpy_power7 attribute_hidden; + +#define STRCPY __strcpy_power7 +#define memcpy __memcpy_power7 +#define strlen __strlen_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <string/strcpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-power8.S new file mode 100644 index 0000000000..6c753b5d1a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-power8.S @@ -0,0 +1,26 @@ +/* Optimized strcpy implementation for POWER8/PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRCPY __strcpy_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strcpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.c new file mode 100644 index 0000000000..cd6dd09541 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.c @@ -0,0 +1,35 @@ +/* Multiarch strcpy for PPC64. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#if defined SHARED && IS_IN (libc) +extern __typeof (memcpy) __memcpy_ppc attribute_hidden; +extern __typeof (strlen) __strlen_ppc attribute_hidden; +extern __typeof (strcpy) __strcpy_ppc attribute_hidden; + +# define STRCPY __strcpy_ppc +# define memcpy __memcpy_ppc +# define strlen __strlen_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strcpy_ppc, __GI_strcpy, __strcpy_ppc); +#endif + +#include <string/strcpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy.c new file mode 100644 index 0000000000..0da53e30b0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcpy.c @@ -0,0 +1,36 @@ +/* Multiple versions of strcpy. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) +# define strcpy __redirect_strcpy +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strcpy) __strcpy_ppc attribute_hidden; +extern __typeof (strcpy) __strcpy_power7 attribute_hidden; +extern __typeof (strcpy) __strcpy_power8 attribute_hidden; +#undef strcpy + +libc_ifunc_redirected (__redirect_strcpy, strcpy, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcpy_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcpy_power7 + : __strcpy_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn-power8.S new file mode 100644 index 0000000000..39b4cd8239 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn-power8.S @@ -0,0 +1,25 @@ +/* Optimized strcspn implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRSPN __strcspn_power8 +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strcspn.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c new file mode 100644 index 0000000000..96396af125 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c @@ -0,0 +1,26 @@ +/* Default strcspn implementation for PowerPC64. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define STRCSPN __strcspn_ppc + +#ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) +#endif + +#include <string/strcspn.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn.c new file mode 100644 index 0000000000..a6df885181 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strcspn.c @@ -0,0 +1,35 @@ +/* Multiple versions of strcspn. PowerPC64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <shlib-compat.h> +#include "init-arch.h" + +#undef strcspn +extern __typeof (strcspn) __libc_strcspn; + +extern __typeof (strcspn) __strcspn_ppc attribute_hidden; +extern __typeof (strcspn) __strcspn_power8 attribute_hidden; + +libc_ifunc (__libc_strcspn, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcspn_power8 + : __strcspn_ppc); + +weak_alias (__libc_strcspn, strcspn) +libc_hidden_builtin_def (strcspn) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-power7.S new file mode 100644 index 0000000000..333496efa5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-power7.S @@ -0,0 +1,26 @@ +/* Optimized strlen implementation for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRLEN __strlen_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-power8.S new file mode 100644 index 0000000000..b4deea5f93 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-power8.S @@ -0,0 +1,26 @@ +/* Optimized strlen implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRLEN __strlen_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-ppc64.S new file mode 100644 index 0000000000..13231b8c64 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen-ppc64.S @@ -0,0 +1,28 @@ +/* Default strlen implementation for PowerPC64. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && IS_IN (libc) +# define STRLEN __strlen_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) +#endif + +#include <sysdeps/powerpc/powerpc64/strlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen.c new file mode 100644 index 0000000000..a5a7b59558 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strlen.c @@ -0,0 +1,44 @@ +/* Multiple versions of strlen. PowerPC64 version. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined SHARED && IS_IN (libc) +/* Redefine strlen so that the compiler won't complain about the type + mismatch with the IFUNC selector in strong_alias, below. */ +# undef strlen +# define strlen __redirect_strlen +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__redirect_strlen) __libc_strlen; + +extern __typeof (__redirect_strlen) __strlen_ppc attribute_hidden; +extern __typeof (__redirect_strlen) __strlen_power7 attribute_hidden; +extern __typeof (__redirect_strlen) __strlen_power8 attribute_hidden; + +libc_ifunc (__libc_strlen, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strlen_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strlen_power7 + : __strlen_ppc); + +#undef strlen +strong_alias (__libc_strlen, strlen) +libc_hidden_ver (__libc_strlen, strlen) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-power7.c new file mode 100644 index 0000000000..177da4a2f0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-power7.c @@ -0,0 +1,24 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define __strncasecmp __strncasecmp_power7 + +extern __typeof (strncasecmp) __strncasecmp_power7 attribute_hidden; + +#include <string/strncase.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S new file mode 100644 index 0000000000..8a24c34719 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S @@ -0,0 +1,28 @@ +/* Optimized strncasecmp implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define __strncasecmp __strncasecmp_power8 +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strncase.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c new file mode 100644 index 0000000000..0a75f75745 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c @@ -0,0 +1,21 @@ +/* Multiarch strncasecmp for PPC64. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define strncasecmp __strncasecmp_ppc + +#include <string/strncase.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase.c new file mode 100644 index 0000000000..197f7133e4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase.c @@ -0,0 +1,36 @@ +/* Multiple versions of strncasecmp + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <shlib-compat.h> +#include "init-arch.h" + +extern __typeof (__strncasecmp) __libc_strncasecmp; + +extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden; +extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden; +extern __typeof (__strncasecmp) __strncasecmp_power8 attribute_hidden; + +libc_ifunc (__libc_strncasecmp, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strncasecmp_power8: + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncasecmp_power7 + : __strncasecmp_ppc); + +weak_alias (__libc_strncasecmp, strncasecmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase_l-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase_l-power7.c new file mode 100644 index 0000000000..f87ff6c640 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase_l-power7.c @@ -0,0 +1,25 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define __strncasecmp_l __strncasecmp_l_power7 +#define USE_IN_EXTENDED_LOCALE_MODEL 1 + +extern __typeof (strncasecmp_l) __strncasecmp_l_power7 attribute_hidden; + +#include <string/strncase.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase_l.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase_l.c new file mode 100644 index 0000000000..6c2429c58a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncase_l.c @@ -0,0 +1,42 @@ +/* Multiple versions of strncasecmp_l + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# define strncasecmp_l __strncasecmp_l_ppc +extern __typeof (__strncasecmp_l) __strncasecmp_l_ppc attribute_hidden; +extern __typeof (__strncasecmp_l) __strncasecmp_l_power7 attribute_hidden; +#endif + +#include <string/strncase_l.c> +#undef strncasecmp_l + +#if IS_IN (libc) +# include <shlib-compat.h> +# include "init-arch.h" + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +extern __typeof (__strncasecmp_l) __libc_strncasecmp_l; +libc_ifunc (__libc_strncasecmp_l, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncasecmp_l_power7 + : __strncasecmp_l_ppc); + +weak_alias (__libc_strncasecmp_l, strncasecmp_l) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c new file mode 100644 index 0000000000..f695f834a0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c @@ -0,0 +1,31 @@ +/* Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRNCAT __strncat_power7 + +extern __typeof (strncat) __strncat_power7 attribute_hidden; +extern __typeof (strlen) __strlen_power7 attribute_hidden; +extern __typeof (strnlen) __strnlen_power7 attribute_hidden; +extern __typeof (memcpy) __memcpy_power7 attribute_hidden; + +#define strlen __strlen_power7 +#define __strnlen __strnlen_power7 +#define memcpy __memcpy_power7 + +#include <string/strncat.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c new file mode 100644 index 0000000000..1ec1259b95 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-power8.c @@ -0,0 +1,31 @@ +/* Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRNCAT __strncat_power8 + +extern __typeof (strncat) __strncat_power8 attribute_hidden; +extern __typeof (strlen) __strlen_power8 attribute_hidden; +extern __typeof (strnlen) __strnlen_power8 attribute_hidden; +extern __typeof (memcpy) __memcpy_power7 attribute_hidden; + +#define strlen __strlen_power8 +#define __strnlen __strnlen_power8 +#define memcpy __memcpy_power7 + +#include <string/strncat.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c new file mode 100644 index 0000000000..e4c8c01105 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat-ppc64.c @@ -0,0 +1,29 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +#include <string.h> + +#define STRNCAT __strncat_ppc +#ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strncat_ppc, __GI_strncat, __strncat_ppc); +#endif + +extern __typeof (strncat) __strncat_ppc attribute_hidden; + +#include <string/strncat.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat.c new file mode 100644 index 0000000000..72f283354e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncat.c @@ -0,0 +1,34 @@ +/* Multiple versions of strncat. PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strncat) __strncat_ppc attribute_hidden; +extern __typeof (strncat) __strncat_power7 attribute_hidden; +extern __typeof (strncat) __strncat_power8 attribute_hidden; + +libc_ifunc (strncat, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strncat_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncat_power7 + : __strncat_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S new file mode 100644 index 0000000000..01729a3bba --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S @@ -0,0 +1,25 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNCMP __strncmp_power4 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power4/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power7.S new file mode 100644 index 0000000000..a069d4b21c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power7.S @@ -0,0 +1,25 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNCMP __strncmp_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power8.S new file mode 100644 index 0000000000..3cbcaada62 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power8.S @@ -0,0 +1,25 @@ +/* Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNCMP __strncmp_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power9.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power9.S new file mode 100644 index 0000000000..6d0deaa6e6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-power9.S @@ -0,0 +1,25 @@ +/* Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNCMP __strncmp_power9 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power9/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-ppc64.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-ppc64.S new file mode 100644 index 0000000000..e4b93ae8f2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp-ppc64.S @@ -0,0 +1,28 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if defined SHARED && IS_IN (libc) +# define STRNCMP __strncmp_ppc + +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + .globl __GI_strncmp; __GI_strncmp = __strncmp_ppc +#endif + +#include <sysdeps/powerpc/powerpc64/strncmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp.c new file mode 100644 index 0000000000..14122c65a4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncmp.c @@ -0,0 +1,47 @@ +/* Multiple versions of strncmp. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if defined SHARED && IS_IN (libc) +# define strncmp __redirect_strncmp +/* Omit the strncmp inline definitions because it would redefine strncmp. */ +# define __NO_STRING_INLINES +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strncmp) __strncmp_ppc attribute_hidden; +extern __typeof (strncmp) __strncmp_power4 attribute_hidden; +extern __typeof (strncmp) __strncmp_power7 attribute_hidden; +extern __typeof (strncmp) __strncmp_power8 attribute_hidden; +extern __typeof (strncmp) __strncmp_power9 attribute_hidden; +# undef strncmp + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strncmp, strncmp, + (hwcap2 & PPC_FEATURE2_ARCH_3_00) + ? __strncmp_power9 : + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strncmp_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncmp_power7 + : (hwcap & PPC_FEATURE_POWER4) + ? __strncmp_power4 + : __strncmp_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-power7.S new file mode 100644 index 0000000000..03f7f83448 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-power7.S @@ -0,0 +1,28 @@ +/* Optimized strncpy implementation for POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNCPY __strncpy_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#define MEMSET __memset_power7 + +#include <sysdeps/powerpc/powerpc64/power7/strncpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S new file mode 100644 index 0000000000..17117eb7ec --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-power8.S @@ -0,0 +1,29 @@ +/* Optimized strncpy implementation for POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNCPY __strncpy_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +/* memset is used to pad the end of the string. */ +#define MEMSET __memset_power8 + +#include <sysdeps/powerpc/powerpc64/power8/strncpy.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-ppc64.c new file mode 100644 index 0000000000..32412974aa --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy-ppc64.c @@ -0,0 +1,33 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRNCPY __strncpy_ppc +#undef weak_alias +#define weak_alias(name, aliasname) \ + extern __typeof (__strncpy_ppc) aliasname \ + __attribute__ ((weak, alias ("__strncpy_ppc"))); +#if IS_IN (libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__strncpy_ppc, __GI_strncpy, __strncpy_ppc); +#endif + +extern __typeof (strncpy) __strncpy_ppc attribute_hidden; + +#include <string/strncpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy.c new file mode 100644 index 0000000000..bb63c185e6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strncpy.c @@ -0,0 +1,42 @@ +/* Multiple versions of strncpy. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/ >. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# define strncpy __redirect_strncpy +/* Omit the strncpy inline definitions because it would redefine strncpy. */ +# define __NO_STRING_INLINES +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strncpy) __strncpy_ppc attribute_hidden; +extern __typeof (strncpy) __strncpy_power7 attribute_hidden; +extern __typeof (strncpy) __strncpy_power8 attribute_hidden; +# undef strncpy + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strncpy, strncpy, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strncpy_power8 + : (hwcap & PPC_FEATURE_HAS_VSX) + ? __strncpy_power7 + : __strncpy_ppc); + +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-power7.S new file mode 100644 index 0000000000..2f0a183e31 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-power7.S @@ -0,0 +1,28 @@ +/* Optimized strnlen version for POWER7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRNLEN __strnlen_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name, alias) + +#include <sysdeps/powerpc/powerpc64/power7/strnlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-power8.S new file mode 100644 index 0000000000..ccea15df10 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-power8.S @@ -0,0 +1,28 @@ +/* Optimized strnlen version for POWER8. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define __strnlen __strnlen_power8 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(name, alias) + +#include <sysdeps/powerpc/powerpc64/power8/strnlen.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-ppc64.c new file mode 100644 index 0000000000..708455a156 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen-ppc64.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/strnlen-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen.c new file mode 100644 index 0000000000..7f89132aa5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strnlen.c @@ -0,0 +1,41 @@ +/* Multiple versions of strnlen. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define strnlen __redirect_strnlen +# define __strnlen __redirect___strnlen +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (__strnlen) __strnlen_ppc attribute_hidden; +extern __typeof (__strnlen) __strnlen_power7 attribute_hidden; +extern __typeof (__strnlen) __strnlen_power8 attribute_hidden; +# undef strnlen +# undef __strnlen +libc_ifunc_redirected (__redirect___strnlen, __strnlen, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strnlen_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strnlen_power7 + : __strnlen_ppc); +weak_alias (__strnlen, strnlen) + +#else +#include <string/strnlen.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-power7.S new file mode 100644 index 0000000000..10bab2ec54 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-power7.S @@ -0,0 +1,26 @@ +/* Optimized strrchr implementation for POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRRCHR __strrchr_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power7/strrchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-power8.S new file mode 100644 index 0000000000..23365a1446 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-power8.S @@ -0,0 +1,39 @@ +/* Optimized strrchr implementation for POWER8. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#undef ENTRY +#define ENTRY(name) \ + .section ".text"; \ + ENTRY_2(__strrchr_power8) \ + .align ALIGNARG(2); \ + BODY_LABEL(__strrchr_power8): \ + cfi_startproc; \ + LOCALENTRY(__strrchr_power8) + +#undef END +#define END(name) \ + cfi_endproc; \ + TRACEBACK(__strrchr_power8) \ + END_2(__strrchr_power8) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strrchr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c new file mode 100644 index 0000000000..62b77a0bbe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c @@ -0,0 +1,33 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRRCHR __strrchr_ppc + +#undef weak_alias +#define weak_alias(name, aliasname) + +#if IS_IN (libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__strrchr_ppc, __GI_strrchr, __strrchr_ppc); +#endif + +extern __typeof (strrchr) __strrchr_ppc attribute_hidden; + +#include <string/strrchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr.c new file mode 100644 index 0000000000..0f94c9d6a1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strrchr.c @@ -0,0 +1,40 @@ +/* Multiple versions of strrchr. PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# define strrchr __redirect_strrchr +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strrchr) __strrchr_ppc attribute_hidden; +extern __typeof (strrchr) __strrchr_power7 attribute_hidden; +extern __typeof (strrchr) __strrchr_power8 attribute_hidden; +#undef strrchr + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strrchr, strrchr, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strrchr_power8 : + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strrchr_power7 + : __strrchr_ppc); +weak_alias (strrchr, rindex) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S new file mode 100644 index 0000000000..f8487f1cbc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn-power8.S @@ -0,0 +1,25 @@ +/* Optimized strspn implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRSPN __strspn_power8 +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strspn.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c new file mode 100644 index 0000000000..53d3d61651 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c @@ -0,0 +1,25 @@ +/* Default strspn implementation for PowerPC64. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define STRSPN __strspn_ppc +#ifdef SHARED +#undef libc_hidden_def +#define libc_hidden_def(name) +#endif + +#include <string/strspn.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn.c new file mode 100644 index 0000000000..0957482766 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strspn.c @@ -0,0 +1,35 @@ +/* Multiple versions of strspn. PowerPC64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +#undef strspn +extern __typeof (strspn) __libc_strspn; + +extern __typeof (strspn) __strspn_ppc attribute_hidden; +extern __typeof (strspn) __strspn_power8 attribute_hidden; + +libc_ifunc (__libc_strspn, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strspn_power8 + : __strspn_ppc); + +weak_alias (__libc_strspn, strspn) +libc_hidden_builtin_def (strspn) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr-power7.S b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr-power7.S new file mode 100644 index 0000000000..3991df74a8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr-power7.S @@ -0,0 +1,30 @@ +/* Optimized strstr implementation for POWER7. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define STRSTR __strstr_power7 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#define STRLEN __strlen_power7 +#define STRNLEN __strnlen_power7 +#define STRCHR __strchr_power7 + +#include <sysdeps/powerpc/powerpc64/power7/strstr.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr-ppc64.c new file mode 100644 index 0000000000..37add12c87 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr-ppc64.c @@ -0,0 +1,29 @@ +/* Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRSTR __strstr_ppc +#if IS_IN (libc) && defined(SHARED) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1(__strstr_ppc, __GI_strstr, __strstr_ppc); +#endif + +extern __typeof (strstr) __strstr_ppc attribute_hidden; + +#include <string/strstr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr.c new file mode 100644 index 0000000000..d903b2702b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/strstr.c @@ -0,0 +1,36 @@ +/* Multiple versions of strstr. PowerPC64 version. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Define multiple versions only for definition in libc. */ +#if IS_IN (libc) +# define strstr __redirect_strstr +# include <string.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (strstr) __strstr_ppc attribute_hidden; +extern __typeof (strstr) __strstr_power7 attribute_hidden; +# undef strstr + +/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle + ifunc symbol properly. */ +libc_ifunc_redirected (__redirect_strstr, strstr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strstr_power7 + : __strstr_ppc); +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-power6.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-power6.c new file mode 100644 index 0000000000..080cb696a7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-power6.c @@ -0,0 +1,19 @@ +/* wcschr.c - Wide Character Search for powerpc64/power6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-power6.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-power7.c new file mode 100644 index 0000000000..8f4de0e857 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-power7.c @@ -0,0 +1,19 @@ +/* wcschr.c - Wide Character Search for powerpc64/power7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-ppc64.c new file mode 100644 index 0000000000..e781e947fe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr-ppc64.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr.c new file mode 100644 index 0000000000..ca373e096f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcschr.c @@ -0,0 +1,43 @@ +/* Multiple versions of wcschr + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# define wcschr __redirect_wcschr +# define __wcschr __redirect___wcschr +# include <wchar.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (wcschr) __wcschr_ppc attribute_hidden; +extern __typeof (wcschr) __wcschr_power6 attribute_hidden; +extern __typeof (wcschr) __wcschr_power7 attribute_hidden; +# undef wcschr +# undef __wcschr + +libc_ifunc_redirected (__redirect___wcschr, __wcschr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __wcschr_power7 + : (hwcap & PPC_FEATURE_ARCH_2_05) + ? __wcschr_power6 + : __wcschr_ppc); +weak_alias (__wcschr, wcschr) +#else +#undef libc_hidden_def +#define libc_hidden_def(a) +#include <wcsmbs/wcschr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-power6.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-power6.c new file mode 100644 index 0000000000..89d8a39640 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-power6.c @@ -0,0 +1,19 @@ +/* wcscpy.c - Wide Character Search for powerpc64/power6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-power6.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-power7.c new file mode 100644 index 0000000000..47ba73b2cc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-power7.c @@ -0,0 +1,19 @@ +/* wcscpy.c - Wide Character Search for powerpc64/power7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-ppc64.c new file mode 100644 index 0000000000..1924b235ef --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy-ppc64.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy.c new file mode 100644 index 0000000000..13e44afb09 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcscpy.c @@ -0,0 +1,36 @@ +/* Multiple versions of wcscpy. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <wchar.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (wcscpy) __wcscpy_ppc attribute_hidden; +extern __typeof (wcscpy) __wcscpy_power6 attribute_hidden; +extern __typeof (wcscpy) __wcscpy_power7 attribute_hidden; + +libc_ifunc (wcscpy, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __wcscpy_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __wcscpy_power6 + : __wcscpy_ppc); +#else +#include <wcsmbs/wcscpy.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-power6.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-power6.c new file mode 100644 index 0000000000..5dc448b339 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-power6.c @@ -0,0 +1,19 @@ +/* wcsrchr.c - Wide Character Search for powerpc64/power6. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-power6.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-power7.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-power7.c new file mode 100644 index 0000000000..fa25aa0475 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-power7.c @@ -0,0 +1,19 @@ +/* wcsrchr.c - Wide Character Search for powerpc64/power7. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-power7.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-ppc64.c new file mode 100644 index 0000000000..8a913412a0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr-ppc64.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr.c new file mode 100644 index 0000000000..07590f5a90 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wcsrchr.c @@ -0,0 +1,36 @@ +/* Multiple versions of wcsrchr. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if IS_IN (libc) +# include <wchar.h> +# include <shlib-compat.h> +# include "init-arch.h" + +extern __typeof (wcsrchr) __wcsrchr_ppc attribute_hidden; +extern __typeof (wcsrchr) __wcsrchr_power6 attribute_hidden; +extern __typeof (wcsrchr) __wcsrchr_power7 attribute_hidden; + +libc_ifunc (wcsrchr, + (hwcap & PPC_FEATURE_HAS_VSX) + ? __wcsrchr_power7 : + (hwcap & PPC_FEATURE_ARCH_2_05) + ? __wcsrchr_power6 + : __wcsrchr_ppc); +#else +#include <wcsmbs/wcsrchr.c> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wordcopy-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wordcopy-ppc64.c new file mode 100644 index 0000000000..078156f5d9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/multiarch/wordcopy-ppc64.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/powerpc32/power4/multiarch/wordcopy-ppc32.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/Implies new file mode 100644 index 0000000000..a372141bb7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/Implies @@ -0,0 +1,2 @@ +powerpc/power4/fpu +powerpc/power4 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/Makefile new file mode 100644 index 0000000000..ba06adb5d0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/Makefile @@ -0,0 +1,6 @@ +# Makefile fragment for POWER4/5/5+. + +ifeq ($(subdir),string) +CFLAGS-wordcopy.c += --param max-variable-expansions-in-unroller=2 --param max-unroll-times=2 -funroll-loops -fpeel-loops +CFLAGS-memmove.c += --param max-variable-expansions-in-unroller=2 --param max-unroll-times=2 -funroll-loops -fpeel-loops +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/fpu/Implies new file mode 100644 index 0000000000..c1f617b7da --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/fpu/multiarch/Implies new file mode 100644 index 0000000000..8d6531a174 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcmp.S new file mode 100644 index 0000000000..6ca98e909c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcmp.S @@ -0,0 +1,1369 @@ +/* Optimized memcmp implementation for PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memcmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + +#ifndef MEMCMP +# define MEMCMP memcmp +#endif + + .machine power4 +EALIGN (MEMCMP, 4, 0) + CALL_MCOUNT 3 + +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r8 /* next word in s1 */ +#define rWORD4 r9 /* next word in s2 */ +#define rWORD5 r10 /* next word in s1 */ +#define rWORD6 r11 /* next word in s2 */ +#define rWORD7 r30 /* next word in s1 */ +#define rWORD8 r31 /* next word in s2 */ + + xor r0, rSTR2, rSTR1 + cmpldi cr6, rN, 0 + cmpldi cr1, rN, 12 + clrldi. r0, r0, 61 + clrldi r12, rSTR1, 61 + cmpldi cr5, r12, 0 + beq- cr6, L(zeroLength) + dcbt 0, rSTR1 + dcbt 0, rSTR2 +/* If less than 8 bytes or not aligned, use the unaligned + byte loop. */ + blt cr1, L(bytealigned) + std rWORD8, -8(r1) + std rWORD7, -16(r1) + cfi_offset(rWORD8, -8) + cfi_offset(rWORD7, -16) + bne L(unaligned) +/* At this point we know both strings have the same alignment and the + compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then we are already double word + aligned and can perform the DW aligned loop. + + Otherwise we know the two strings have the same alignment (but not + yet DW). So we force the string addresses to the next lower DW + boundary and special case this first DW using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (DW aligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected register pair. */ + .align 4 +L(samealignment): + clrrdi rSTR1, rSTR1, 3 + clrrdi rSTR2, rSTR2, 3 + beq cr5, L(DWaligned) + add rN, rN, r12 + sldi rWORD6, r12, 3 + srdi r0, rN, 5 /* Divide by 32 */ + andi. r12, rN, 24 /* Get the DW remainder */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) +#endif + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dPs4) + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(dPs3) + beq cr1, L(dPs2) + +/* Remainder is 8 */ + .align 3 +L(dsP1): + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD2, rWORD6 + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +/* Do something useful in this cycle since we have to branch anyway. */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 + b L(dP1e) +/* Remainder is 16 */ + .align 4 +L(dPs2): + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD2, rWORD6 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +/* Do something useful in this cycle since we have to branch anyway. */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD7, 8(rSTR1) + ld rWORD8, 8(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + b L(dP2e) +/* Remainder is 24 */ + .align 4 +L(dPs3): + sld rWORD3, rWORD1, rWORD6 + sld rWORD4, rWORD2, rWORD6 + cmpld cr1, rWORD3, rWORD4 + b L(dP3e) +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(dPs4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + sld rWORD1, rWORD1, rWORD6 + sld rWORD2, rWORD2, rWORD6 + cmpld cr7, rWORD1, rWORD2 + b L(dP4e) + +/* At this point we know both strings are double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWaligned): + andi. r12, rN, 24 /* Get the DW remainder */ + srdi r0, rN, 5 /* Divide by 32 */ + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dP4) + bgt cr1, L(dP3) + beq cr1, L(dP2) + +/* Remainder is 8 */ + .align 4 +L(dP1): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not + change any on the early exit path. The key here is the non-early + exit path only cares about the condition code (cr5), not about which + register pair was used. */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 0(rSTR1) + ld rWORD6, 0(rSTR2) +#endif + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 +L(dP1e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 16(rSTR1) + ld rWORD4, 16(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 24(rSTR1) + ld rWORD6, 24(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5x) + bne cr7, L(dLcr7x) + +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ldu rWORD7, 32(rSTR1) + ldu rWORD8, 32(rSTR2) +#endif + bne cr1, L(dLcr1) + cmpld cr5, rWORD7, rWORD8 + bdnz L(dLoop) + bne cr6, L(dLcr6) + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) + .align 3 +L(dP1x): + sldi. r12, rN, 3 + bne cr5, L(dLcr5x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + li rRTN, 0 + blr + +/* Remainder is 16 */ + .align 4 +L(dP2): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 0(rSTR1) + ld rWORD6, 0(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD7, 8(rSTR1) + ld rWORD8, 8(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 +L(dP2e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 16(rSTR1) + ld rWORD2, 16(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 24(rSTR1) + ld rWORD4, 24(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + bne cr6, L(dLcr6) + bne cr5, L(dLcr5) + b L(dLoop2) +/* Again we are on a early exit path (16-23 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP2x): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 8(rSTR1) + ld rWORD4, 8(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 + sldi. r12, rN, 3 + bne cr6, L(dLcr6x) +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + bne cr1, L(dLcr1x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + li rRTN, 0 + blr + +/* Remainder is 24 */ + .align 4 +L(dP3): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 0(rSTR1) + ld rWORD4, 0(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 +L(dP3e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 8(rSTR1) + ld rWORD6, 8(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP3x) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD7, 16(rSTR1) + ld rWORD8, 16(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 24(rSTR1) + ld rWORD2, 24(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#endif + bne cr1, L(dLcr1) + bne cr6, L(dLcr6) + b L(dLoop1) +/* Again we are on a early exit path (24-31 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP3x): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 16(rSTR1) + ld rWORD2, 16(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 + sldi. r12, rN, 3 + bne cr1, L(dLcr1x) +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#endif + bne cr6, L(dLcr6x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne cr7, L(dLcr7x) + bne L(d00) + li rRTN, 0 + blr + +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(dP4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 +L(dP4e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 8(rSTR1) + ld rWORD4, 8(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 16(rSTR1) + ld rWORD6, 16(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ldu rWORD7, 24(rSTR1) + ldu rWORD8, 24(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) + bne cr1, L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(dLoop): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) +L(dLoop1): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 16(rSTR1) + ld rWORD4, 16(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) +L(dLoop2): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 24(rSTR1) + ld rWORD6, 24(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) +L(dLoop3): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ldu rWORD7, 32(rSTR1) + ldu rWORD8, 32(rSTR2) +#endif + bne- cr1, L(dLcr1) + cmpld cr7, rWORD1, rWORD2 + bdnz+ L(dLoop) + +L(dL4): + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) + cmpld cr5, rWORD7, rWORD8 +L(d44): + bne cr7, L(dLcr7) +L(d34): + bne cr1, L(dLcr1) +L(d24): + bne cr6, L(dLcr6) +L(d14): + sldi. r12, rN, 3 + bne cr5, L(dLcr5) +L(d04): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + beq L(zeroLength) +/* At this point we have a remainder of 1 to 7 bytes to compare. Since + we are aligned it is safe to load the whole double word, and use + shift right double to eliminate bits beyond the compare length. */ +L(d00): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +#endif + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN + cmpld cr7, rWORD1, rWORD2 + bne cr7, L(dLcr7x) + li rRTN, 0 + blr + + .align 4 +L(dLcr7): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) +L(dLcr7x): + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr + .align 4 +L(dLcr1): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) +L(dLcr1x): + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr + .align 4 +L(dLcr6): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) +L(dLcr6x): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + .align 4 +L(dLcr5): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) +L(dLcr5x): + li rRTN, 1 + bgtlr cr5 + li rRTN, -1 + blr + + .align 4 +L(bytealigned): + mtctr rN /* Power4 wants mtctr 1st in dispatch group */ +#if 0 +/* Huh? We've already branched on cr6! */ + beq- cr6, L(zeroLength) +#endif + +/* We need to prime this loop. This loop is swing modulo scheduled + to avoid pipe delays. The dependent instruction latencies (load to + compare to conditional branch) is 2 to 3 cycles. In this loop each + dispatch group ends in a branch and takes 1 cycle. Effectively + the first iteration of the loop only serves to load operands and + branches based on compares are delayed until the next loop. + + So we must precondition some registers and condition codes so that + we don't exit the loop early on the first iteration. */ + + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + bdz- L(b11) + cmpld cr7, rWORD1, rWORD2 + lbz rWORD3, 1(rSTR1) + lbz rWORD4, 1(rSTR2) + bdz- L(b12) + cmpld cr1, rWORD3, rWORD4 + lbzu rWORD5, 2(rSTR1) + lbzu rWORD6, 2(rSTR2) + bdz- L(b13) + .align 4 +L(bLoop): + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + bne- cr7, L(bLcr7) + + cmpld cr6, rWORD5, rWORD6 + bdz- L(b3i) + + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + bne- cr1, L(bLcr1) + + cmpld cr7, rWORD1, rWORD2 + bdz- L(b2i) + + lbzu rWORD5, 1(rSTR1) + lbzu rWORD6, 1(rSTR2) + bne- cr6, L(bLcr6) + + cmpld cr1, rWORD3, rWORD4 + bdnz+ L(bLoop) + +/* We speculatively loading bytes before we have tested the previous + bytes. But we must avoid overrunning the length (in the ctr) to + prevent these speculative loads from causing a segfault. In this + case the loop will exit early (before the all pending bytes are + tested. In this case we must complete the pending operations + before returning. */ +L(b1i): + bne- cr7, L(bLcr7) + bne- cr1, L(bLcr1) + b L(bx56) + .align 4 +L(b2i): + bne- cr6, L(bLcr6) + bne- cr7, L(bLcr7) + b L(bx34) + .align 4 +L(b3i): + bne- cr1, L(bLcr1) + bne- cr6, L(bLcr6) + b L(bx12) + .align 4 +L(bLcr7): + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr +L(bLcr1): + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr +L(bLcr6): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + +L(b13): + bne- cr7, L(bx12) + bne- cr1, L(bx34) +L(bx56): + sub rRTN, rWORD5, rWORD6 + blr + nop +L(b12): + bne- cr7, L(bx12) +L(bx34): + sub rRTN, rWORD3, rWORD4 + blr +L(b11): +L(bx12): + sub rRTN, rWORD1, rWORD2 + blr + .align 4 +L(zeroLength): + li rRTN, 0 + blr + + .align 4 +/* At this point we know the strings have different alignment and the + compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then rStr1 is double word + aligned and can perform the DWunaligned loop. + + Otherwise we know that rSTR1 is not already DW aligned yet. + So we can force the string addresses to the next lower DW + boundary and special case this first DW using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (DWaligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected resister pair. */ +#define rSHL r29 /* Unaligned shift left count. */ +#define rSHR r28 /* Unaligned shift right count. */ +#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */ +#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */ +#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */ +#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */ +L(unaligned): + std rSHL, -24(r1) + cfi_offset(rSHL, -24) + clrldi rSHL, rSTR2, 61 + beq- cr6, L(duzeroLength) + std rSHR, -32(r1) + cfi_offset(rSHR, -32) + beq cr5, L(DWunaligned) + std rWORD8_SHIFT, -40(r1) + cfi_offset(rWORD8_SHIFT, -40) +/* Adjust the logical start of rSTR2 to compensate for the extra bits + in the 1st rSTR1 DW. */ + sub rWORD8_SHIFT, rSTR2, r12 +/* But do not attempt to address the DW before that DW that contains + the actual start of rSTR2. */ + clrrdi rSTR2, rSTR2, 3 + std rWORD2_SHIFT, -48(r1) +/* Compute the left/right shift counts for the unaligned rSTR2, + compensating for the logical (DW aligned) start of rSTR1. */ + clrldi rSHL, rWORD8_SHIFT, 61 + clrrdi rSTR1, rSTR1, 3 + std rWORD4_SHIFT, -56(r1) + sldi rSHL, rSHL, 3 + cmpld cr5, rWORD8_SHIFT, rSTR2 + add rN, rN, r12 + sldi rWORD6, r12, 3 + std rWORD6_SHIFT, -64(r1) + cfi_offset(rWORD2_SHIFT, -48) + cfi_offset(rWORD4_SHIFT, -56) + cfi_offset(rWORD6_SHIFT, -64) + subfic rSHR, rSHL, 64 + srdi r0, rN, 5 /* Divide by 32 */ + andi. r12, rN, 24 /* Get the DW remainder */ +/* We normally need to load 2 DWs to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a DW where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ + li rWORD8, 0 + blt cr5, L(dus0) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD8, 0(rSTR2) + addi rSTR2, rSTR2, 8 +#endif + sld rWORD8, rWORD8, rSHL + +L(dus0): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) +#endif + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + srd r12, rWORD2, rSHR + clrldi rN, rN, 61 + beq L(duPs4) + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + or rWORD8, r12, rWORD8 + bgt cr1, L(duPs3) + beq cr1, L(duPs2) + +/* Remainder is 8 */ + .align 4 +L(dusP1): + sld rWORD8_SHIFT, rWORD2, rSHL + sld rWORD7, rWORD1, rWORD6 + sld rWORD8, rWORD8, rWORD6 + bge cr7, L(duP1e) +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ + cmpld cr5, rWORD7, rWORD8 + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD2, 8(rSTR2) +#endif + srd r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 16 */ + .align 4 +L(duPs2): + sld rWORD6_SHIFT, rWORD2, rSHL + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD8, rWORD6 + b L(duP2e) +/* Remainder is 24 */ + .align 4 +L(duPs3): + sld rWORD4_SHIFT, rWORD2, rSHL + sld rWORD3, rWORD1, rWORD6 + sld rWORD4, rWORD8, rWORD6 + b L(duP3e) +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(duPs4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + or rWORD8, r12, rWORD8 + sld rWORD2_SHIFT, rWORD2, rSHL + sld rWORD1, rWORD1, rWORD6 + sld rWORD2, rWORD8, rWORD6 + b L(duP4e) + +/* At this point we know rSTR1 is double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWunaligned): + std rWORD8_SHIFT, -40(r1) + clrrdi rSTR2, rSTR2, 3 + std rWORD2_SHIFT, -48(r1) + srdi r0, rN, 5 /* Divide by 32 */ + std rWORD4_SHIFT, -56(r1) + andi. r12, rN, 24 /* Get the DW remainder */ + std rWORD6_SHIFT, -64(r1) + cfi_offset(rWORD8_SHIFT, -40) + cfi_offset(rWORD2_SHIFT, -48) + cfi_offset(rWORD4_SHIFT, -56) + cfi_offset(rWORD6_SHIFT, -64) + sldi rSHL, rSHL, 3 +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD6, 0, rSTR2 + addi rSTR2, rSTR2, 8 + ldbrx rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD6, 0(rSTR2) + ldu rWORD8, 8(rSTR2) +#endif + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + subfic rSHR, rSHL, 64 + sld rWORD6_SHIFT, rWORD6, rSHL + beq L(duP4) + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(duP3) + beq cr1, L(duP2) + +/* Remainder is 8 */ + .align 4 +L(duP1): + srd r12, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + addi rSTR1, rSTR1, 8 +#else + ld rWORD7, 0(rSTR1) +#endif + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP1x) +L(duP1e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 16(rSTR1) + ld rWORD4, 16(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + bne cr5, L(duLcr5) + or rWORD4, r12, rWORD2_SHIFT +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 24(rSTR1) + ld rWORD6, 24(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + bne cr7, L(duLcr7) + or rWORD6, r0, rWORD4_SHIFT + cmpld cr6, rWORD5, rWORD6 + b L(duLoop3) + .align 4 +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +L(duP1x): + cmpld cr5, rWORD7, rWORD8 + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD2, 8(rSTR2) +#endif + srd r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 16 */ + .align 4 +L(duP2): + srd r0, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + addi rSTR1, rSTR1, 8 +#else + ld rWORD5, 0(rSTR1) +#endif + or rWORD6, r0, rWORD6_SHIFT + sld rWORD6_SHIFT, rWORD8, rSHL +L(duP2e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD7, 8(rSTR1) + ld rWORD8, 8(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP2x) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 16(rSTR1) + ld rWORD2, 16(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 24(rSTR1) + ld rWORD4, 24(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 + bne cr5, L(duLcr5) + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + cmpld cr1, rWORD3, rWORD4 + b L(duLoop2) + .align 4 +L(duP2x): + cmpld cr5, rWORD7, rWORD8 +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#endif + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD2, 8(rSTR2) +#endif + srd r0, rWORD2, rSHR + b L(dutrim) + +/* Remainder is 24 */ + .align 4 +L(duP3): + srd r12, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + addi rSTR1, rSTR1, 8 +#else + ld rWORD3, 0(rSTR1) +#endif + sld rWORD4_SHIFT, rWORD8, rSHL + or rWORD4, r12, rWORD6_SHIFT +L(duP3e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 8(rSTR1) + ld rWORD6, 8(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD7, 16(rSTR1) + ld rWORD8, 16(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP3x) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 24(rSTR1) + ld rWORD2, 24(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#endif + cmpld cr7, rWORD1, rWORD2 + b L(duLoop1) + .align 4 +L(duP3x): +#ifndef __LITTLE_ENDIAN__ + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#endif +#if 0 +/* Huh? We've already branched on cr1! */ + bne cr1, L(duLcr1) +#endif + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD2, 8(rSTR2) +#endif + srd r0, rWORD2, rSHR + b L(dutrim) + +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(duP4): + mtctr r0 /* Power4 wants mtctr 1st in dispatch group */ + srd r0, rWORD8, rSHR +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + addi rSTR1, rSTR1, 8 +#else + ld rWORD1, 0(rSTR1) +#endif + sld rWORD2_SHIFT, rWORD8, rSHL + or rWORD2, r0, rWORD6_SHIFT +L(duP4e): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 8(rSTR1) + ld rWORD4, 8(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 16(rSTR1) + ld rWORD6, 16(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 + bne cr7, L(duLcr7) + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ldu rWORD7, 24(rSTR1) + ldu rWORD8, 24(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + cmpld cr5, rWORD7, rWORD8 + bdz- L(du24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(duLoop): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 + ldbrx rWORD2, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD1, 8(rSTR1) + ld rWORD2, 8(rSTR2) +#endif + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +L(duLoop1): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD3, 0, rSTR1 + ldbrx rWORD4, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD3, 16(rSTR1) + ld rWORD4, 16(rSTR2) +#endif + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +L(duLoop2): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD5, 0, rSTR1 + ldbrx rWORD6, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ld rWORD5, 24(rSTR1) + ld rWORD6, 24(rSTR2) +#endif + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(duLcr7) + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +L(duLoop3): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD7, 0, rSTR1 + ldbrx rWORD8, 0, rSTR2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 +#else + ldu rWORD7, 32(rSTR1) + ldu rWORD8, 32(rSTR2) +#endif + cmpld cr7, rWORD1, rWORD2 + bne- cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + bdnz+ L(duLoop) + +L(duL4): +#if 0 +/* Huh? We've already branched on cr1! */ + bne cr1, L(duLcr1) +#endif + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + cmpld cr5, rWORD7, rWORD8 +L(du44): + bne cr7, L(duLcr7) +L(du34): + bne cr1, L(duLcr1) +L(du24): + bne cr6, L(duLcr6) +L(du14): + sldi. rN, rN, 3 + bne cr5, L(duLcr5) +/* At this point we have a remainder of 1 to 7 bytes to compare. We use + shift right double to eliminate bits beyond the compare length. + + However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in + rWORD8_SHIFT). */ + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD2, 0, rSTR2 + addi rSTR2, rSTR2, 8 +#else + ld rWORD2, 8(rSTR2) +#endif + srd r0, rWORD2, rSHR + .align 4 +L(dutrim): +#ifdef __LITTLE_ENDIAN__ + ldbrx rWORD1, 0, rSTR1 +#else + ld rWORD1, 8(rSTR1) +#endif + ld rWORD8, -8(r1) + subfic rN, rN, 64 /* Shift count is 64 - (rN * 8). */ + or rWORD2, r0, rWORD8_SHIFT + ld rWORD7, -16(r1) + ld rSHL, -24(r1) + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN + ld rSHR, -32(r1) + ld rWORD8_SHIFT, -40(r1) + li rRTN, 0 + cmpld cr7, rWORD1, rWORD2 + ld rWORD2_SHIFT, -48(r1) + ld rWORD4_SHIFT, -56(r1) + beq cr7, L(dureturn24) + li rRTN, 1 + ld rWORD6_SHIFT, -64(r1) + bgtlr cr7 + li rRTN, -1 + blr + .align 4 +L(duLcr7): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) + li rRTN, 1 + bgt cr7, L(dureturn29) + ld rSHL, -24(r1) + ld rSHR, -32(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr1): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) + li rRTN, 1 + bgt cr1, L(dureturn29) + ld rSHL, -24(r1) + ld rSHR, -32(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr6): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) + li rRTN, 1 + bgt cr6, L(dureturn29) + ld rSHL, -24(r1) + ld rSHR, -32(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr5): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) + li rRTN, 1 + bgt cr5, L(dureturn29) + ld rSHL, -24(r1) + ld rSHR, -32(r1) + li rRTN, -1 + b L(dureturn27) + .align 3 +L(duZeroReturn): + li rRTN, 0 + .align 4 +L(dureturn): + ld rWORD8, -8(r1) + ld rWORD7, -16(r1) +L(dureturn29): + ld rSHL, -24(r1) + ld rSHR, -32(r1) +L(dureturn27): + ld rWORD8_SHIFT, -40(r1) +L(dureturn26): + ld rWORD2_SHIFT, -48(r1) +L(dureturn25): + ld rWORD4_SHIFT, -56(r1) +L(dureturn24): + ld rWORD6_SHIFT, -64(r1) + blr +L(duzeroLength): + li rRTN, 0 + blr + +END (MEMCMP) +libc_hidden_builtin_def (memcmp) +weak_alias (memcmp, bcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcopy.h b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcopy.h new file mode 100644 index 0000000000..9a4ff79f4a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcopy.h @@ -0,0 +1 @@ +#include "../../powerpc32/power4/memcopy.h" diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcpy.S new file mode 100644 index 0000000000..2e96376b9f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memcpy.S @@ -0,0 +1,477 @@ +/* Optimized memcpy implementation for PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. + + Memcpy handles short copies (< 32-bytes) using a binary move blocks + (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled + with the appropriate combination of byte and halfword load/stores. + There is minimal effort to optimize the alignment of short moves. + The 64-bit implementations of POWER3 and POWER4 do a reasonable job + of handling unaligned load/stores that do not cross 32-byte boundaries. + + Longer moves (>= 32-bytes) justify the effort to get at least the + destination doubleword (8-byte) aligned. Further optimization is + possible when both source and destination are doubleword aligned. + Each case has a optimized unrolled loop. */ + +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + .machine power4 +EALIGN (MEMCPY, 5, 0) + CALL_MCOUNT 3 + + cmpldi cr1,5,31 + neg 0,3 + std 3,-16(1) + std 31,-8(1) + cfi_offset(31,-8) + andi. 11,3,7 /* check alignment of dst. */ + clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ + clrldi 10,4,61 /* check alignment of src. */ + cmpldi cr6,5,8 + ble- cr1,.L2 /* If move < 32 bytes use short move code. */ + cmpld cr6,10,11 + mr 12,4 + srdi 9,5,3 /* Number of full double words remaining. */ + mtcrf 0x01,0 + mr 31,5 + beq .L0 + + subf 31,0,5 + /* Move 0-7 bytes as needed to get the destination doubleword aligned. */ +1: bf 31,2f + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: bf 30,4f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: bf 29,0f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +0: + clrldi 10,12,61 /* check alignment of src again. */ + srdi 9,31,3 /* Number of full double words remaining. */ + + /* Copy doublewords from source to destination, assuming the + destination is aligned on a doubleword boundary. + + At this point we know there are at least 25 bytes left (32-7) to copy. + The next step is to determine if the source is also doubleword aligned. + If not branch to the unaligned move code at .L6. which uses + a load, shift, store strategy. + + Otherwise source and destination are doubleword aligned, and we can + the optimized doubleword copy loop. */ +.L0: + clrldi 11,31,61 + mtcrf 0x01,9 + cmpldi cr1,11,0 + bne- cr6,.L6 /* If source is not DW aligned. */ + + /* Move doublewords where destination and source are DW aligned. + Use a unrolled loop to copy 4 doubleword (32-bytes) per iteration. + If the copy is not an exact multiple of 32 bytes, 1-3 + doublewords are copied as needed to set up the main loop. After + the main loop exits there may be a tail of 1-7 bytes. These byte are + copied a word/halfword/byte at a time as needed to preserve alignment. */ + + srdi 8,31,5 + cmpldi cr1,9,4 + cmpldi cr6,11,0 + mr 11,12 + + bf 30,1f + ld 6,0(12) + ld 7,8(12) + addi 11,12,16 + mtctr 8 + std 6,0(3) + std 7,8(3) + addi 10,3,16 + bf 31,4f + ld 0,16(12) + std 0,16(3) + blt cr1,3f + addi 11,12,24 + addi 10,3,24 + b 4f + .align 4 +1: + mr 10,3 + mtctr 8 + bf 31,4f + ld 6,0(12) + addi 11,12,8 + std 6,0(3) + addi 10,3,8 + + .align 4 +4: + ld 6,0(11) + ld 7,8(11) + ld 8,16(11) + ld 0,24(11) + addi 11,11,32 +2: + std 6,0(10) + std 7,8(10) + std 8,16(10) + std 0,24(10) + addi 10,10,32 + bdnz 4b +3: + + rldicr 0,31,0,60 + mtcrf 0x01,31 + beq cr6,0f +.L9: + add 3,3,0 + add 12,12,0 + +/* At this point we have a tail of 0-7 bytes and we know that the + destination is double word aligned. */ +4: bf 29,2f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: bf 30,1f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + ld 31,-8(1) + ld 3,-16(1) + blr + +/* Copy up to 31 bytes. This divided into two cases 0-8 bytes and 9-31 + bytes. Each case is handled without loops, using binary (1,2,4,8) + tests. + + In the short (0-8 byte) case no attempt is made to force alignment + of either source or destination. The hardware will handle the + unaligned load/stores with small delays for crossing 32- 64-byte, and + 4096-byte boundaries. Since these short moves are unlikely to be + unaligned or cross these boundaries, the overhead to force + alignment is not justified. + + The longer (9-31 byte) move is more likely to cross 32- or 64-byte + boundaries. Since only loads are sensitive to the 32-/64-byte + boundaries it is more important to align the source then the + destination. If the source is not already word aligned, we first + move 1-3 bytes as needed. Since we are only word aligned we don't + use double word load/stores to insure that all loads are aligned. + While the destination and stores may still be unaligned, this + is only an issue for page (4096 byte boundary) crossing, which + should be rare for these short moves. The hardware handles this + case automatically with a small delay. */ + + .align 4 +.L2: + mtcrf 0x01,5 + neg 8,4 + clrrdi 11,4,2 + andi. 0,8,3 + ble cr6,.LE8 /* Handle moves of 0-8 bytes. */ +/* At least 9 bytes left. Get the source word aligned. */ + cmpldi cr1,5,16 + mr 10,5 + mr 12,4 + cmpldi cr6,0,2 + beq .L3 /* If the source is already word aligned skip this. */ +/* Copy 1-3 bytes to get source address word aligned. */ + lwz 6,0(11) + subf 10,0,5 + add 12,4,0 + blt cr6,5f + srdi 7,6,16 + bgt cr6,3f +#ifdef __LITTLE_ENDIAN__ + sth 7,0(3) +#else + sth 6,0(3) +#endif + b 7f + .align 4 +3: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,24 + stb 6,0(3) + sth 7,1(3) +#else + stb 7,0(3) + sth 6,1(3) +#endif + b 7f + .align 4 +5: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,8 +#endif + stb 6,0(3) +7: + cmpldi cr1,10,16 + add 3,3,0 + mtcrf 0x01,10 + .align 4 +.L3: +/* At least 6 bytes left and the source is word aligned. */ + blt cr1,8f +16: /* Move 16 bytes. */ + lwz 6,0(12) + lwz 7,4(12) + stw 6,0(3) + lwz 6,8(12) + stw 7,4(3) + lwz 7,12(12) + addi 12,12,16 + stw 6,8(3) + stw 7,12(3) + addi 3,3,16 +8: /* Move 8 bytes. */ + bf 28,4f + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Move 4 bytes. */ + bf 29,2f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Move 2-3 bytes. */ + bf 30,1f + lhz 6,0(12) + sth 6,0(3) + bf 31,0f + lbz 7,2(12) + stb 7,2(3) + ld 3,-16(1) + blr +1: /* Move 1 byte. */ + bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + ld 3,-16(1) + blr + +/* Special case to copy 0-8 bytes. */ + .align 4 +.LE8: + mr 12,4 + bne cr6,4f +/* Would have liked to use use ld/std here but the 630 processors are + slow for load/store doubles that are not at least word aligned. + Unaligned Load/Store word execute with only a 1 cycle penalty. */ + lwz 6,0(4) + lwz 7,4(4) + stw 6,0(3) + stw 7,4(3) + /* Return original dst pointer. */ + ld 3,-16(1) + blr + .align 4 +4: bf 29,2b + lwz 6,0(4) + stw 6,0(3) +6: + bf 30,5f + lhz 7,4(4) + sth 7,4(3) + bf 31,0f + lbz 8,6(4) + stb 8,6(3) + ld 3,-16(1) + blr + .align 4 +5: + bf 31,0f + lbz 6,4(4) + stb 6,4(3) + .align 4 +0: + /* Return original dst pointer. */ + ld 3,-16(1) + blr + + .align 4 +.L6: + + /* Copy doublewords where the destination is aligned but the source is + not. Use aligned doubleword loads from the source, shifted to realign + the data, to allow aligned destination stores. */ + addi 11,9,-1 /* loop DW count is one less than total */ + subf 5,10,12 + sldi 10,10,3 + mr 4,3 + srdi 8,11,2 /* calculate the 32 byte loop count */ + ld 6,0(5) + mtcrf 0x01,11 + cmpldi cr6,9,4 + mtctr 8 + ld 7,8(5) + subfic 9,10,64 + bf 30,1f + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 + sld 8,7,9 +#else + sld 0,6,10 + srd 8,7,9 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srd 0,7,10 + sld 8,6,9 +#else + sld 0,7,10 + srd 8,6,9 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,8f /* if total DWs = 3, then bypass loop */ + bf 31,4f + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 + sld 8,7,9 +#else + sld 0,6,10 + srd 8,7,9 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,8f /* if total DWs = 4, then bypass loop */ + b 4f + .align 4 +1: +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 + sld 8,7,9 +#else + sld 0,6,10 + srd 8,7,9 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,4f + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +4: +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 + sld 8,7,9 +#else + sld 0,6,10 + srd 8,7,9 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srd 0,7,10 + sld 8,6,9 +#else + sld 0,7,10 + srd 8,6,9 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 + sld 8,7,9 +#else + sld 0,6,10 + srd 8,7,9 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srd 0,7,10 + sld 8,6,9 +#else + sld 0,7,10 + srd 8,6,9 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ 4b + .align 4 +8: + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srd 0,6,10 + sld 8,7,9 +#else + sld 0,6,10 + srd 8,7,9 +#endif + or 0,0,8 + std 0,0(4) +3: + rldicr 0,31,0,60 + mtcrf 0x01,31 + bne cr1,.L9 /* If the tail is 0 bytes we are done! */ + /* Return original dst pointer. */ + ld 31,-8(1) + ld 3,-16(1) + blr +END_GEN_TB (MEMCPY,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memset.S new file mode 100644 index 0000000000..a57214e0b0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/memset.S @@ -0,0 +1,251 @@ +/* Optimized memset implementation for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. + + The memset is done in three sizes: byte (8 bits), word (32 bits), + cache line (256 bits). There is a special case for setting cache lines + to 0, to take advantage of the dcbz instruction. */ + +#ifndef MEMSET +# define MEMSET memset +#endif + .machine power4 +EALIGN (MEMSET, 5, 0) + CALL_MCOUNT 3 + +#define rTMP r0 +#define rRTN r3 /* Initial value of 1st argument. */ +#define rMEMP0 r3 /* Original value of 1st arg. */ +#define rCHR r4 /* Char to set in each byte. */ +#define rLEN r5 /* Length of region to set. */ +#define rMEMP r6 /* Address at which we are storing. */ +#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */ +#define rMEMP2 r8 + +#define rNEG64 r8 /* Constant -64 for clearing with dcbz. */ +#define rCLS r8 /* Cache line size obtained from static. */ +#define rCLM r9 /* Cache line size mask to check for cache alignment. */ +L(_memset): +/* Take care of case for size <= 4. */ + cmpldi cr1, rLEN, 8 + andi. rALIGN, rMEMP0, 7 + mr rMEMP, rMEMP0 + ble- cr1, L(small) + +/* Align to doubleword boundary. */ + cmpldi cr5, rLEN, 31 + insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */ + beq+ L(aligned2) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 8 + cror 28,30,31 /* Detect odd word aligned. */ + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + bt 29, L(g4) +/* Process the even word of doubleword. */ + bf+ 31, L(g2) + stb rCHR, 0(rMEMP0) + bt 30, L(g4x) +L(g2): + sth rCHR, -6(rMEMP) +L(g4x): + stw rCHR, -4(rMEMP) + b L(aligned) +/* Process the odd word of doubleword. */ +L(g4): + bf 28, L(g4x) /* If false, word aligned on odd word. */ + bf+ 31, L(g0) + stb rCHR, 0(rMEMP0) + bt 30, L(aligned) +L(g0): + sth rCHR, -2(rMEMP) + +/* Handle the case of size < 31. */ +L(aligned2): + insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ +L(aligned): + mtcrf 0x01, rLEN + ble cr5, L(medium) +/* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x18 + subfic rALIGN, rALIGN, 0x20 + insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + cmplwi cr1, rALIGN, 0x10 + mr rMEMP2, rMEMP + bf 28, L(a1) + stdu rCHR, -8(rMEMP2) +L(a1): blt cr1, L(a2) + std rCHR, -8(rMEMP2) + stdu rCHR, -16(rMEMP2) +L(a2): + +/* Now aligned to a 32 byte boundary. */ +L(caligned): + cmpldi cr1, rCHR, 0 + clrrdi. rALIGN, rLEN, 5 + mtcrf 0x01, rLEN + beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */ +L(nondcbz): + srdi rTMP, rALIGN, 5 + mtctr rTMP + beq L(medium) /* We may not actually get to do a full line. */ + clrldi. rLEN, rLEN, 59 + add rMEMP, rMEMP, rALIGN + li rNEG64, -0x40 + bdz L(cloopdone) + +L(c3): dcbtst rNEG64, rMEMP + std rCHR, -8(rMEMP) + std rCHR, -16(rMEMP) + std rCHR, -24(rMEMP) + stdu rCHR, -32(rMEMP) + bdnz L(c3) +L(cloopdone): + std rCHR, -8(rMEMP) + std rCHR, -16(rMEMP) + cmpldi cr1, rLEN, 16 + std rCHR, -24(rMEMP) + stdu rCHR, -32(rMEMP) + beqlr + add rMEMP, rMEMP, rALIGN + b L(medium_tail2) + + .align 5 +/* Clear lines of memory in 128-byte chunks. */ +L(zloopstart): +/* If the remaining length is less the 32 bytes, don't bother getting + the cache line size. */ + beq L(medium) + li rCLS,128 /* cache line size is 128 */ + +/* Now we know the cache line size, and it is not 32-bytes, but + we may not yet be aligned to the cache line. May have a partial + line to fill, so touch it 1st. */ + dcbt 0,rMEMP +L(getCacheAligned): + cmpldi cr1,rLEN,32 + andi. rTMP,rMEMP,127 + blt cr1,L(handletail32) + beq L(cacheAligned) + addi rMEMP,rMEMP,32 + addi rLEN,rLEN,-32 + std rCHR,-32(rMEMP) + std rCHR,-24(rMEMP) + std rCHR,-16(rMEMP) + std rCHR,-8(rMEMP) + b L(getCacheAligned) + +/* Now we are aligned to the cache line and can use dcbz. */ +L(cacheAligned): + cmpld cr1,rLEN,rCLS + blt cr1,L(handletail32) + dcbz 0,rMEMP + subf rLEN,rCLS,rLEN + add rMEMP,rMEMP,rCLS + b L(cacheAligned) + +/* We are here because the cache line size was set and was not 32-bytes + and the remainder (rLEN) is less than the actual cache line size. + So set up the preconditions for L(nondcbz) and go there. */ +L(handletail32): + clrrwi. rALIGN, rLEN, 5 + b L(nondcbz) + + .align 5 +L(small): +/* Memset of 8 bytes or less. */ + cmpldi cr6, rLEN, 4 + cmpldi cr5, rLEN, 1 + ble cr6,L(le4) + subi rLEN, rLEN, 4 + stb rCHR,0(rMEMP) + stb rCHR,1(rMEMP) + stb rCHR,2(rMEMP) + stb rCHR,3(rMEMP) + addi rMEMP,rMEMP, 4 + cmpldi cr5, rLEN, 1 +L(le4): + cmpldi cr1, rLEN, 3 + bltlr cr5 + stb rCHR, 0(rMEMP) + beqlr cr5 + stb rCHR, 1(rMEMP) + bltlr cr1 + stb rCHR, 2(rMEMP) + beqlr cr1 + stb rCHR, 3(rMEMP) + blr + +/* Memset of 0-31 bytes. */ + .align 5 +L(medium): + insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + cmpldi cr1, rLEN, 16 +L(medium_tail2): + add rMEMP, rMEMP, rLEN +L(medium_tail): + bt- 31, L(medium_31t) + bt- 30, L(medium_30t) +L(medium_30f): + bt- 29, L(medium_29t) +L(medium_29f): + bge- cr1, L(medium_27t) + bflr- 28 + std rCHR, -8(rMEMP) + blr + +L(medium_31t): + stbu rCHR, -1(rMEMP) + bf- 30, L(medium_30f) +L(medium_30t): + sthu rCHR, -2(rMEMP) + bf- 29, L(medium_29f) +L(medium_29t): + stwu rCHR, -4(rMEMP) + blt- cr1, L(medium_27f) +L(medium_27t): + std rCHR, -8(rMEMP) + stdu rCHR, -16(rMEMP) +L(medium_27f): + bflr- 28 +L(medium_28t): + std rCHR, -8(rMEMP) + blr +END_GEN_TB (MEMSET,TB_TOCLESS) +libc_hidden_builtin_def (memset) + +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. */ +ENTRY (__bzero) + CALL_MCOUNT 3 + mr r5,r4 + li r4,0 + b L(_memset) +END (__bzero) +#ifndef __bzero +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/multiarch/Implies new file mode 100644 index 0000000000..30edcf7f9d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power4/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/strncmp.S new file mode 100644 index 0000000000..2b0c00dfb2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power4/strncmp.S @@ -0,0 +1,225 @@ +/* Optimized strcmp implementation for PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRNCMP +# define STRNCMP strncmp +#endif + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */ + +EALIGN (STRNCMP, 4, 0) + CALL_MCOUNT 3 + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r10 +#define rWORD4 r11 +#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ +#define rBITDIF r11 /* bits that differ in s1 & s2 words */ +#define rTMP r12 + + dcbt 0,rSTR1 + or rTMP, rSTR2, rSTR1 + lis r7F7F, 0x7f7f + dcbt 0,rSTR2 + clrldi. rTMP, rTMP, 61 + cmpldi cr1, rN, 0 + lis rFEFE, -0x101 + bne L(unaligned) +/* We are doubleword aligned so set up for two loops. first a double word + loop, then fall into the byte loop if any residual. */ + srdi. rTMP, rN, 3 + clrldi rN, rN, 61 + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + cmpldi cr1, rN, 0 + beq L(unaligned) + + mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */ + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) + sldi rTMP, rFEFE, 32 + insrdi r7F7F, r7F7F, 32, 0 + add rFEFE, rFEFE, rTMP + b L(g1) + +L(g0): + ldu rWORD1, 8(rSTR1) + bne- cr1, L(different) + ldu rWORD2, 8(rSTR2) +L(g1): add rTMP, rFEFE, rWORD1 + nor rNEG, r7F7F, rWORD1 + bdz L(tail) + and. rTMP, rTMP, rNEG + cmpd cr1, rWORD1, rWORD2 + beq+ L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + +#ifdef __LITTLE_ENDIAN__ +L(endstring): + addi rTMP2, rTMP, -1 + beq cr1, L(equal) + andc rTMP2, rTMP2, rTMP + rldimi rTMP2, rTMP2, 1, 0 + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + cmpd cr1, rWORD1, rWORD2 + beq cr1, L(equal) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr + +#else +L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzd rBITDIF, rBITDIF + cntlzd rNEG, rNEG + addi rNEG, rNEG, 7 + cmpd cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + blt- cr1, L(equal) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(tail): + and. rTMP, rTMP, rNEG + cmpd cr1, rWORD1, rWORD2 + bne- L(endstring) + addi rSTR1, rSTR1, 8 + bne- cr1, L(different) + addi rSTR2, rSTR2, 8 + cmpldi cr1, rN, 0 +L(unaligned): + mtctr rN /* Power4 wants mtctr 1st in dispatch group */ + ble cr1, L(ux) +L(uz): + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + .align 4 +L(u1): + cmpdi cr1, rWORD1, 0 + bdz L(u4) + cmpd rWORD1, rWORD2 + beq- cr1, L(u4) + bne- L(u4) + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + cmpdi cr1, rWORD3, 0 + bdz L(u3) + cmpd rWORD3, rWORD4 + beq- cr1, L(u3) + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + cmpdi cr1, rWORD1, 0 + bdz L(u4) + cmpd rWORD1, rWORD2 + beq- cr1, L(u4) + bne- L(u4) + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + cmpdi cr1, rWORD3, 0 + bdz L(u3) + cmpd rWORD3, rWORD4 + beq- cr1, L(u3) + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + b L(u1) + +L(u3): sub rRTN, rWORD3, rWORD4 + blr +L(u4): sub rRTN, rWORD1, rWORD2 + blr +L(ux): + li rRTN, 0 + blr +END (STRNCMP) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/Implies new file mode 100644 index 0000000000..565bc94471 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/Implies @@ -0,0 +1,4 @@ +powerpc/power5+/fpu +powerpc/power5+ +powerpc/powerpc64/power5/fpu +powerpc/powerpc64/power5 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/Implies new file mode 100644 index 0000000000..f00c50fb49 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power5/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/multiarch/Implies new file mode 100644 index 0000000000..c0e67848e2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power5/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S new file mode 100644 index 0000000000..39b7ee78e5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceil.S @@ -0,0 +1,37 @@ +/* ceil function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__ceil, 4, 0) + CALL_MCOUNT 0 + frip fp1, fp1 + blr + END (__ceil) + +weak_alias (__ceil, ceil) + +#ifdef NO_LONG_DOUBLE +weak_alias (__ceil, ceill) +strong_alias (__ceil, __ceill) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __ceil, ceill, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S new file mode 100644 index 0000000000..d1c6f26d6d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_ceilf.S @@ -0,0 +1,30 @@ +/* ceilf function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__ceilf, 4, 0) + CALL_MCOUNT 0 + frip fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__ceilf) + +weak_alias (__ceilf, ceilf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S new file mode 100644 index 0000000000..6411f15633 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_floor.S @@ -0,0 +1,37 @@ +/* floor function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__floor, 4, 0) + CALL_MCOUNT 0 + frim fp1, fp1 + blr + END (__floor) + +weak_alias (__floor, floor) + +#ifdef NO_LONG_DOUBLE +weak_alias (__floor, floorl) +strong_alias (__floor, __floorl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0) +compat_symbol (libm, __floor, floorl, GLIBC_2_0) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S new file mode 100644 index 0000000000..26c3b2594b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_floorf.S @@ -0,0 +1,30 @@ +/* floorf function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__floorf, 4, 0) + CALL_MCOUNT 0 + frim fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__floorf) + +weak_alias (__floorf, floorf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S new file mode 100644 index 0000000000..909714b449 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_llround.S @@ -0,0 +1,58 @@ +/* llround function. POWER5+, PowerPC64 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long long [r3] llround (float x [fp1]) + IEEE 1003.1 llround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we pre-round using the V2.02 Floating Round to Integer Nearest + instruction before we use Floating Convert to Integer Word with + round to zero instruction. */ + + .machine "power5" +EALIGN (__llround, 4, 0) + CALL_MCOUNT 0 + frin fp2, fp1 /* Round to nearest +-0.5. */ + fctidz fp3, fp2 /* Convert To Integer DW round toward 0. */ + stfd fp3, -16(r1) + nop /* Insure the following load is in a different dispatch group */ + nop /* to avoid pipe stall on POWER4&5. */ + nop + ld r3, -16(r1) + blr + END (__llround) + +strong_alias (__llround, __lround) +weak_alias (__llround, llround) +weak_alias (__lround, lround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S new file mode 100644 index 0000000000..dc46d20f4f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_round.S @@ -0,0 +1,37 @@ +/* round function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__round, 4, 0) + CALL_MCOUNT 0 + frin fp1, fp1 + blr + END (__round) + +weak_alias (__round, round) + +#ifdef NO_LONG_DOUBLE +weak_alias (__round, roundl) +strong_alias (__round, __roundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __round, roundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S new file mode 100644 index 0000000000..0a587843ad --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_roundf.S @@ -0,0 +1,30 @@ +/* roundf function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__roundf, 4, 0) + CALL_MCOUNT 0 + frin fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__roundf) + +weak_alias (__roundf, roundf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S new file mode 100644 index 0000000000..7f8290e408 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_trunc.S @@ -0,0 +1,37 @@ +/* trunc function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power5" +EALIGN (__trunc, 4, 0) + CALL_MCOUNT 0 + friz fp1, fp1 + blr + END (__trunc) + +weak_alias (__trunc, trunc) + +#ifdef NO_LONG_DOUBLE +weak_alias (__trunc, truncl) +strong_alias (__trunc, __truncl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __trunc, truncl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S new file mode 100644 index 0000000000..07f5d33127 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/fpu/s_truncf.S @@ -0,0 +1,30 @@ +/* truncf function. PowerPC64/power5+ version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .machine "power5" +EALIGN (__truncf, 4, 0) + CALL_MCOUNT 0 + friz fp1, fp1 /* The rounding instructions are double. */ + frsp fp1, fp1 /* But we need to set ooverflow for float. */ + blr + END (__truncf) + +weak_alias (__truncf, truncf) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/multiarch/Implies new file mode 100644 index 0000000000..0851b19fa2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5+/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power5/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/Implies new file mode 100644 index 0000000000..bedb20b65c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power4/fpu +powerpc/powerpc64/power4 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/Implies new file mode 100644 index 0000000000..6b8c23efa6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power4/fpu/ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/multiarch/Implies new file mode 100644 index 0000000000..3740d050a6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power4/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/s_isnan.S new file mode 100644 index 0000000000..d6a829ea37 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/fpu/s_isnan.S @@ -0,0 +1,60 @@ +/* isnan(). PowerPC64 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power5 +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + stfd fp1,-8(r1) /* copy FPR to GPR */ + lis r0,0x7ff0 + nop /* insure the following is in a different */ + nop /* dispatch group */ + ld r4,-8(r1) + sldi r0,r0,32 /* const long r0 0x7ff00000 00000000 */ + clrldi r4,r4,1 /* x = fabs(x) */ + cmpd cr7,r4,r0 /* if (fabs(x) <= inf) */ + li r3,0 /* then return 0 */ + blelr+ cr7 + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power5/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/multiarch/Implies new file mode 100644 index 0000000000..9a3cbb0938 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power5/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power4/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/Implies new file mode 100644 index 0000000000..4c782d4122 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power5+/fpu +powerpc/powerpc64/power5+ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/Implies new file mode 100644 index 0000000000..f09854edb6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power5+/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/multiarch/Implies new file mode 100644 index 0000000000..fca8a4ef0f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power5+/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_copysign.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_copysign.S new file mode 100644 index 0000000000..ec36d1be5b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_copysign.S @@ -0,0 +1,58 @@ +/* copysign(). PowerPC64/POWER6 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* double [f1] copysign (double [f1] x, double [f2] y); + copysign(x,y) returns a value with the magnitude of x and + with the sign bit of y. */ + + .section ".text" + .type __copysign, @function + .machine power6 +EALIGN (__copysign, 4, 0) + CALL_MCOUNT 0 + fcpsgn fp1,fp2,fp1 + blr +END (__copysign) + +hidden_def (__copysign) +weak_alias (__copysign, copysign) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__copysign, __copysignf) +hidden_def (__copysignf) +weak_alias (__copysignf, copysignf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__copysign, __copysignl) +weak_alias (__copysign, copysignl) +#endif + +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, copysign, copysignl, GLIBC_2_0) +# endif +#else +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, copysign, copysignl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_copysignf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_copysignf.S new file mode 100644 index 0000000000..d4aa702d07 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_copysignf.S @@ -0,0 +1 @@ +/* This function uses the same code as s_copysign.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_isnan.S new file mode 100644 index 0000000000..85187b45f3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/fpu/s_isnan.S @@ -0,0 +1,59 @@ +/* isnan(). PowerPC64 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power6 +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + stfd fp1,-8(r1) /* copy FPR to GPR */ + ori r1,r1,0 + ld r4,-8(r1) + lis r0,0x7ff0 + sldi r0,r0,32 /* const long r0 0x7ff00000 00000000 */ + clrldi r4,r4,1 /* x = fabs(x) */ + cmpd cr7,r4,r0 /* if (fabs(x) <= inf) */ + li r3,0 /* then return 0 */ + blelr+ cr7 + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/memcpy.S new file mode 100644 index 0000000000..1f7294b8ed --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/memcpy.S @@ -0,0 +1,1499 @@ +/* Optimized memcpy implementation for PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. + + Memcpy handles short copies (< 32-bytes) using a binary move blocks + (no loops) of lwz/stw. The tail (remaining 1-3) bytes is handled + with the appropriate combination of byte and halfword load/stores. + There is minimal effort to optimize the alignment of short moves. + The 64-bit implementations of POWER3 and POWER4 do a reasonable job + of handling unaligned load/stores that do not cross 32-byte boundaries. + + Longer moves (>= 32-bytes) justify the effort to get at least the + destination doubleword (8-byte) aligned. Further optimization is + possible when both source and destination are doubleword aligned. + Each case has a optimized unrolled loop. + + For POWER6 unaligned loads will take a 20+ cycle hiccup for any + L1 cache miss that crosses a 32- or 128-byte boundary. Store + is more forgiving and does not take a hiccup until page or + segment boundaries. So we require doubleword alignment for + the source but may take a risk and only require word alignment + for the destination. */ + +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + .machine "power6" +EALIGN (MEMCPY, 7, 0) + CALL_MCOUNT 3 + + cmpldi cr1,5,31 + neg 0,3 + std 3,-16(1) + std 31,-8(1) + andi. 11,3,7 /* check alignment of dst. */ + clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ + clrldi 10,4,61 /* check alignment of src. */ + cmpldi cr6,5,8 + ble- cr1,.L2 /* If move < 32 bytes use short move code. */ + mtcrf 0x01,0 + cmpld cr6,10,11 + srdi 9,5,3 /* Number of full double words remaining. */ + beq .L0 + + subf 5,0,5 + /* Move 0-7 bytes as needed to get the destination doubleword aligned. + Duplicate some code to maximize fall-through and minimize agen delays. */ +1: bf 31,2f + lbz 6,0(4) + stb 6,0(3) + bf 30,5f + lhz 6,1(4) + sth 6,1(3) + bf 29,0f + lwz 6,3(4) + stw 6,3(3) + b 0f +5: + bf 29,0f + lwz 6,1(4) + stw 6,1(3) + b 0f + +2: bf 30,4f + lhz 6,0(4) + sth 6,0(3) + bf 29,0f + lwz 6,2(4) + stw 6,2(3) + b 0f + +4: bf 29,0f + lwz 6,0(4) + stw 6,0(3) +0: +/* Add the number of bytes until the 1st doubleword of dst to src and dst. */ + add 4,4,0 + add 3,3,0 + + clrldi 10,4,61 /* check alignment of src again. */ + srdi 9,5,3 /* Number of full double words remaining. */ + + /* Copy doublewords from source to destination, assuming the + destination is aligned on a doubleword boundary. + + At this point we know there are at least 25 bytes left (32-7) to copy. + The next step is to determine if the source is also doubleword aligned. + If not branch to the unaligned move code at .L6. which uses + a load, shift, store strategy. + + Otherwise source and destination are doubleword aligned, and we can + the optimized doubleword copy loop. */ + .align 4 +.L0: + clrldi 11,5,61 + andi. 0,5,0x78 + srdi 12,5,7 /* Number of 128-byte blocks to move. */ + cmpldi cr1,11,0 /* If the tail is 0 bytes */ + bne- cr6,.L6 /* If source is not DW aligned. */ + + /* Move doublewords where destination and source are DW aligned. + Use a unrolled loop to copy 16 doublewords (128-bytes) per iteration. + If the copy is not an exact multiple of 128 bytes, 1-15 + doublewords are copied as needed to set up the main loop. After + the main loop exits there may be a tail of 1-7 bytes. These byte + are copied a word/halfword/byte at a time as needed to preserve + alignment. + + For POWER6 the L1 is store-through and the L2 is store-in. The + L2 is clocked at half CPU clock so we can store 16 bytes every + other cycle. POWER6 also has a load/store bypass so we can do + load, load, store, store every 2 cycles. + + The following code is sensitive to cache line alignment. Do not + make any change with out first making sure they don't result in + splitting ld/std pairs across a cache line. */ + + mtcrf 0x02,5 + mtcrf 0x01,5 + cmpldi cr5,12,1 + beq L(das_loop) + + bf 25,4f + .align 3 + ld 6,0(4) + ld 7,8(4) + mr 11,4 + mr 10,3 + std 6,0(3) + std 7,8(3) + ld 6,16(4) + ld 7,24(4) + std 6,16(3) + std 7,24(3) + ld 6,0+32(4) + ld 7,8+32(4) + addi 4,4,64 + addi 3,3,64 + std 6,0+32(10) + std 7,8+32(10) + ld 6,16+32(11) + ld 7,24+32(11) + std 6,16+32(10) + std 7,24+32(10) +4: + mr 10,3 + bf 26,2f + ld 6,0(4) + ld 7,8(4) + mr 11,4 + nop + std 6,0(3) + std 7,8(3) + ld 6,16(4) + ld 7,24(4) + addi 4,4,32 + std 6,16(3) + std 7,24(3) + addi 3,3,32 +6: + nop + bf 27,5f + ld 6,0+32(11) + ld 7,8+32(11) + addi 4,4,16 + addi 3,3,16 + std 6,0+32(10) + std 7,8+32(10) + bf 28,L(das_loop_s) + ld 0,16+32(11) + addi 4,4,8 + addi 3,3,8 + std 0,16+32(10) + blt cr5,L(das_tail) + b L(das_loop) + .align 3 +5: + nop + bf 28,L(das_loop_s) + ld 6,32(11) + addi 4,4,8 + addi 3,3,8 + std 6,32(10) + blt cr5,L(das_tail) + b L(das_loop) + .align 3 +2: + mr 11,4 + bf 27,1f + ld 6,0(4) + ld 7,8(4) + addi 4,4,16 + addi 3,3,16 + std 6,0(10) + std 7,8(10) + bf 28,L(das_loop_s) + ld 0,16(11) + addi 4,11,24 + addi 3,10,24 + std 0,16(10) + blt cr5,L(das_tail) + b L(das_loop) + .align 3 +1: + nop + bf 28,L(das_loop_s) + ld 6,0(4) + addi 4,4,8 + addi 3,3,8 + std 6,0(10) +L(das_loop_s): + nop + blt cr5,L(das_tail) + .align 4 +L(das_loop): + ld 6,0(4) + ld 7,8(4) + mr 10,3 + mr 11,4 + std 6,0(3) + std 7,8(3) + addi 12,12,-1 + nop + ld 8,16(4) + ld 0,24(4) + std 8,16(3) + std 0,24(3) + + ld 6,0+32(4) + ld 7,8+32(4) + std 6,0+32(3) + std 7,8+32(3) + ld 8,16+32(4) + ld 0,24+32(4) + std 8,16+32(3) + std 0,24+32(3) + + ld 6,0+64(11) + ld 7,8+64(11) + std 6,0+64(10) + std 7,8+64(10) + ld 8,16+64(11) + ld 0,24+64(11) + std 8,16+64(10) + std 0,24+64(10) + + ld 6,0+96(11) + ld 7,8+96(11) + addi 4,4,128 + addi 3,3,128 + std 6,0+96(10) + std 7,8+96(10) + ld 8,16+96(11) + ld 0,24+96(11) + std 8,16+96(10) + std 0,24+96(10) + ble cr5,L(das_loop_e) + + mtctr 12 + .align 4 +L(das_loop2): + ld 6,0(4) + ld 7,8(4) + mr 10,3 + mr 11,4 + std 6,0(3) + std 7,8(3) + ld 8,16(4) + ld 0,24(4) + std 8,16(3) + std 0,24(3) + + ld 6,0+32(4) + ld 7,8+32(4) + std 6,0+32(3) + std 7,8+32(3) + ld 8,16+32(4) + ld 0,24+32(4) + std 8,16+32(3) + std 0,24+32(3) + + ld 6,0+64(11) + ld 7,8+64(11) + std 6,0+64(10) + std 7,8+64(10) + ld 8,16+64(11) + ld 0,24+64(11) + std 8,16+64(10) + std 0,24+64(10) + + ld 6,0+96(11) + ld 7,8+96(11) + addi 4,4,128 + addi 3,3,128 + std 6,0+96(10) + std 7,8+96(10) + ld 8,16+96(11) + ld 0,24+96(11) + std 8,16+96(10) + std 0,24+96(10) + bdnz L(das_loop2) +L(das_loop_e): +/* Check of a 1-7 byte tail, return if none. */ + bne cr1,L(das_tail2) +/* Return original dst pointer. */ + ld 3,-16(1) + blr + .align 4 +L(das_tail): + beq cr1,0f + +L(das_tail2): +/* At this point we have a tail of 0-7 bytes and we know that the + destination is double word aligned. */ +4: bf 29,2f + lwz 6,0(4) + stw 6,0(3) + bf 30,5f + lhz 6,4(4) + sth 6,4(3) + bf 31,0f + lbz 6,6(4) + stb 6,6(3) + b 0f +5: bf 31,0f + lbz 6,4(4) + stb 6,4(3) + b 0f + +2: bf 30,1f + lhz 6,0(4) + sth 6,0(3) + bf 31,0f + lbz 6,2(4) + stb 6,2(3) + b 0f + +1: bf 31,0f + lbz 6,0(4) + stb 6,0(3) +0: + /* Return original dst pointer. */ + ld 3,-16(1) + blr + +/* Copy up to 31 bytes. This divided into two cases 0-8 bytes and 9-31 + bytes. Each case is handled without loops, using binary (1,2,4,8) + tests. + + In the short (0-8 byte) case no attempt is made to force alignment + of either source or destination. The hardware will handle the + unaligned load/stores with small delays for crossing 32- 128-byte, + and 4096-byte boundaries. Since these short moves are unlikely to be + unaligned or cross these boundaries, the overhead to force + alignment is not justified. + + The longer (9-31 byte) move is more likely to cross 32- or 128-byte + boundaries. Since only loads are sensitive to the 32-/128-byte + boundaries it is more important to align the source then the + destination. If the source is not already word aligned, we first + move 1-3 bytes as needed. Since we are only word aligned we don't + use double word load/stores to insure that all loads are aligned. + While the destination and stores may still be unaligned, this + is only an issue for page (4096 byte boundary) crossing, which + should be rare for these short moves. The hardware handles this + case automatically with a small (~20 cycle) delay. */ + .align 4 +.L2: + mtcrf 0x01,5 + neg 8,4 + clrrdi 11,4,2 + andi. 0,8,3 + ble cr6,.LE8 /* Handle moves of 0-8 bytes. */ +/* At least 9 bytes left. Get the source word aligned. */ + cmpldi cr1,5,16 + mr 10,5 + mr 12,4 + cmpldi cr6,0,2 + beq L(dus_tail) /* If the source is already word aligned skip this. */ +/* Copy 1-3 bytes to get source address word aligned. */ + lwz 6,0(11) + subf 10,0,5 + add 12,4,0 + blt cr6,5f + srdi 7,6,16 + bgt cr6,3f +#ifdef __LITTLE_ENDIAN__ + sth 7,0(3) +#else + sth 6,0(3) +#endif + b 7f + .align 4 +3: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,24 + stb 6,0(3) + sth 7,1(3) +#else + stb 7,0(3) + sth 6,1(3) +#endif + b 7f + .align 4 +5: +#ifdef __LITTLE_ENDIAN__ + rotlwi 6,6,8 +#endif + stb 6,0(3) +7: + cmpldi cr1,10,16 + add 3,3,0 + mtcrf 0x01,10 + .align 4 +L(dus_tail): +/* At least 6 bytes left and the source is word aligned. This allows + some speculative loads up front. */ +/* We need to special case the fall-through because the biggest delays + are due to address computation not being ready in time for the + AGEN. */ + lwz 6,0(12) + lwz 7,4(12) + blt cr1,L(dus_tail8) + cmpldi cr0,10,24 +L(dus_tail16): /* Move 16 bytes. */ + stw 6,0(3) + stw 7,4(3) + lwz 6,8(12) + lwz 7,12(12) + stw 6,8(3) + stw 7,12(3) +/* Move 8 bytes more. */ + bf 28,L(dus_tail16p8) + cmpldi cr1,10,28 + lwz 6,16(12) + lwz 7,20(12) + stw 6,16(3) + stw 7,20(3) +/* Move 4 bytes more. */ + bf 29,L(dus_tail16p4) + lwz 6,24(12) + stw 6,24(3) + addi 12,12,28 + addi 3,3,28 + bgt cr1,L(dus_tail2) + /* exactly 28 bytes. Return original dst pointer and exit. */ + ld 3,-16(1) + blr + .align 4 +L(dus_tail16p8): /* less than 8 bytes left. */ + beq cr1,L(dus_tailX) /* exactly 16 bytes, early exit. */ + cmpldi cr1,10,20 + bf 29,L(dus_tail16p2) +/* Move 4 bytes more. */ + lwz 6,16(12) + stw 6,16(3) + addi 12,12,20 + addi 3,3,20 + bgt cr1,L(dus_tail2) + /* exactly 20 bytes. Return original dst pointer and exit. */ + ld 3,-16(1) + blr + .align 4 +L(dus_tail16p4): /* less than 4 bytes left. */ + addi 12,12,24 + addi 3,3,24 + bgt cr0,L(dus_tail2) + /* exactly 24 bytes. Return original dst pointer and exit. */ + ld 3,-16(1) + blr + .align 4 +L(dus_tail16p2): /* 16 bytes moved, less than 4 bytes left. */ + addi 12,12,16 + addi 3,3,16 + b L(dus_tail2) + + .align 4 +L(dus_tail8): /* Move 8 bytes. */ +/* r6, r7 already loaded speculatively. */ + cmpldi cr1,10,8 + cmpldi cr0,10,12 + bf 28,L(dus_tail4) + .align 2 + stw 6,0(3) + stw 7,4(3) +/* Move 4 bytes more. */ + bf 29,L(dus_tail8p4) + lwz 6,8(12) + stw 6,8(3) + addi 12,12,12 + addi 3,3,12 + bgt cr0,L(dus_tail2) + /* exactly 12 bytes. Return original dst pointer and exit. */ + ld 3,-16(1) + blr + .align 4 +L(dus_tail8p4): /* less than 4 bytes left. */ + addi 12,12,8 + addi 3,3,8 + bgt cr1,L(dus_tail2) + /* exactly 8 bytes. Return original dst pointer and exit. */ + ld 3,-16(1) + blr + + .align 4 +L(dus_tail4): /* Move 4 bytes. */ +/* r6 already loaded speculatively. If we are here we know there is + more than 4 bytes left. So there is no need to test. */ + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +L(dus_tail2): /* Move 2-3 bytes. */ + bf 30,L(dus_tail1) + lhz 6,0(12) + sth 6,0(3) + bf 31,L(dus_tailX) + lbz 7,2(12) + stb 7,2(3) + ld 3,-16(1) + blr +L(dus_tail1): /* Move 1 byte. */ + bf 31,L(dus_tailX) + lbz 6,0(12) + stb 6,0(3) +L(dus_tailX): + /* Return original dst pointer. */ + ld 3,-16(1) + blr + +/* Special case to copy 0-8 bytes. */ + .align 4 +.LE8: + mr 12,4 + bne cr6,L(dus_4) +/* Exactly 8 bytes. We may cross a 32-/128-byte boundary and take a ~20 + cycle delay. This case should be rare and any attempt to avoid this + would take most of 20 cycles any way. */ + ld 6,0(4) + std 6,0(3) + /* Return original dst pointer. */ + ld 3,-16(1) + blr + .align 4 +L(dus_4): + bf 29,L(dus_tail2) + lwz 6,0(4) + stw 6,0(3) + bf 30,L(dus_5) + lhz 7,4(4) + sth 7,4(3) + bf 31,L(dus_0) + lbz 8,6(4) + stb 8,6(3) + ld 3,-16(1) + blr + .align 4 +L(dus_5): + bf 31,L(dus_0) + lbz 6,4(4) + stb 6,4(3) +L(dus_0): + /* Return original dst pointer. */ + ld 3,-16(1) + blr + + .align 4 +.L6: + cfi_offset(31,-8) + mr 12,4 + mr 31,5 + /* Copy doublewords where the destination is aligned but the source is + not. Use aligned doubleword loads from the source, shifted to realign + the data, to allow aligned destination stores. */ + addi 11,9,-1 /* loop DW count is one less than total */ + subf 5,10,12 /* Move source addr to previous full double word. */ + cmpldi cr5, 10, 2 + cmpldi cr0, 10, 4 + mr 4,3 + srdi 8,11,2 /* calculate the 32 byte loop count */ + ld 6,0(5) /* pre load 1st full doubleword. */ + mtcrf 0x01,11 + cmpldi cr6,9,4 + mtctr 8 + ld 7,8(5) /* pre load 2nd full doubleword. */ + bge cr0, L(du4_do) + blt cr5, L(du1_do) + beq cr5, L(du2_do) + b L(du3_do) + + .align 4 +L(du1_do): + bf 30,L(du1_1dw) + + /* there are at least two DWs to copy */ + /* FIXME: can combine last shift and "or" into "rldimi" */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 8 + sldi 8,7, 64-8 +#else + sldi 0,6, 8 + srdi 8,7, 64-8 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 8 + sldi 8,6, 64-8 +#else + sldi 0,7, 8 + srdi 8,6, 64-8 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du1_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du1_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 8 + sldi 8,7, 64-8 +#else + sldi 0,6, 8 + srdi 8,7, 64-8 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du1_fini) /* if total DWs = 4, then bypass loop */ + b L(du1_loop) + .align 4 +L(du1_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 8 + sldi 8,7, 64-8 +#else + sldi 0,6, 8 + srdi 8,7, 64-8 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du1_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du1_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 8 + sldi 8,7, 64-8 +#else + sldi 0,6, 8 + srdi 8,7, 64-8 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 8 + sldi 8,6, 64-8 +#else + sldi 0,7, 8 + srdi 8,6, 64-8 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 8 + sldi 8,7, 64-8 +#else + sldi 0,6, 8 + srdi 8,7, 64-8 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 8 + sldi 8,6, 64-8 +#else + sldi 0,7, 8 + srdi 8,6, 64-8 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du1_loop) + .align 4 +L(du1_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 8 + sldi 8,7, 64-8 +#else + sldi 0,6, 8 + srdi 8,7, 64-8 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du2_do): + bf 30,L(du2_1dw) + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 16 + sldi 8,7, 64-16 +#else + sldi 0,6, 16 + srdi 8,7, 64-16 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 16 + sldi 8,6, 64-16 +#else + sldi 0,7, 16 + srdi 8,6, 64-16 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du2_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du2_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 16 + sldi 8,7, 64-16 +#else + sldi 0,6, 16 + srdi 8,7, 64-16 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du2_fini) /* if total DWs = 4, then bypass loop */ + b L(du2_loop) + .align 4 +L(du2_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 16 + sldi 8,7, 64-16 +#else + sldi 0,6, 16 + srdi 8,7, 64-16 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du2_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du2_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 16 + sldi 8,7, 64-16 +#else + sldi 0,6, 16 + srdi 8,7, 64-16 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 16 + sldi 8,6, 64-16 +#else + sldi 0,7, 16 + srdi 8,6, 64-16 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 16 + sldi 8,7, 64-16 +#else + sldi 0,6, 16 + srdi 8,7, 64-16 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 16 + sldi 8,6, 64-16 +#else + sldi 0,7, 16 + srdi 8,6, 64-16 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du2_loop) + .align 4 +L(du2_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 16 + sldi 8,7, 64-16 +#else + sldi 0,6, 16 + srdi 8,7, 64-16 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du3_do): + bf 30,L(du3_1dw) + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 24 + sldi 8,7, 64-24 +#else + sldi 0,6, 24 + srdi 8,7, 64-24 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 24 + sldi 8,6, 64-24 +#else + sldi 0,7, 24 + srdi 8,6, 64-24 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du3_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du3_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 24 + sldi 8,7, 64-24 +#else + sldi 0,6, 24 + srdi 8,7, 64-24 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du3_fini) /* if total DWs = 4, then bypass loop */ + b L(du3_loop) + .align 4 +L(du3_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 24 + sldi 8,7, 64-24 +#else + sldi 0,6, 24 + srdi 8,7, 64-24 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du3_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du3_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 24 + sldi 8,7, 64-24 +#else + sldi 0,6, 24 + srdi 8,7, 64-24 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 24 + sldi 8,6, 64-24 +#else + sldi 0,7, 24 + srdi 8,6, 64-24 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 24 + sldi 8,7, 64-24 +#else + sldi 0,6, 24 + srdi 8,7, 64-24 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 24 + sldi 8,6, 64-24 +#else + sldi 0,7, 24 + srdi 8,6, 64-24 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du3_loop) + .align 4 +L(du3_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 24 + sldi 8,7, 64-24 +#else + sldi 0,6, 24 + srdi 8,7, 64-24 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du4_do): + cmpldi cr5, 10, 6 + beq cr0, L(du4_dox) + blt cr5, L(du5_do) + beq cr5, L(du6_do) + b L(du7_do) +L(du4_dox): + bf 30,L(du4_1dw) + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 32 + sldi 8,7, 64-32 +#else + sldi 0,6, 32 + srdi 8,7, 64-32 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 32 + sldi 8,6, 64-32 +#else + sldi 0,7, 32 + srdi 8,6, 64-32 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du4_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du4_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 32 + sldi 8,7, 64-32 +#else + sldi 0,6, 32 + srdi 8,7, 64-32 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du4_fini) /* if total DWs = 4, then bypass loop */ + b L(du4_loop) + .align 4 +L(du4_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 32 + sldi 8,7, 64-32 +#else + sldi 0,6, 32 + srdi 8,7, 64-32 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du4_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du4_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 32 + sldi 8,7, 64-32 +#else + sldi 0,6, 32 + srdi 8,7, 64-32 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 32 + sldi 8,6, 64-32 +#else + sldi 0,7, 32 + srdi 8,6, 64-32 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 32 + sldi 8,7, 64-32 +#else + sldi 0,6, 32 + srdi 8,7, 64-32 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 32 + sldi 8,6, 64-32 +#else + sldi 0,7, 32 + srdi 8,6, 64-32 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du4_loop) + .align 4 +L(du4_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 32 + sldi 8,7, 64-32 +#else + sldi 0,6, 32 + srdi 8,7, 64-32 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du5_do): + bf 30,L(du5_1dw) + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 40 + sldi 8,7, 64-40 +#else + sldi 0,6, 40 + srdi 8,7, 64-40 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 40 + sldi 8,6, 64-40 +#else + sldi 0,7, 40 + srdi 8,6, 64-40 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du5_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du5_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 40 + sldi 8,7, 64-40 +#else + sldi 0,6, 40 + srdi 8,7, 64-40 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du5_fini) /* if total DWs = 4, then bypass loop */ + b L(du5_loop) + .align 4 +L(du5_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 40 + sldi 8,7, 64-40 +#else + sldi 0,6, 40 + srdi 8,7, 64-40 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du5_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du5_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 40 + sldi 8,7, 64-40 +#else + sldi 0,6, 40 + srdi 8,7, 64-40 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 40 + sldi 8,6, 64-40 +#else + sldi 0,7, 40 + srdi 8,6, 64-40 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 40 + sldi 8,7, 64-40 +#else + sldi 0,6, 40 + srdi 8,7, 64-40 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 40 + sldi 8,6, 64-40 +#else + sldi 0,7, 40 + srdi 8,6, 64-40 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du5_loop) + .align 4 +L(du5_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 40 + sldi 8,7, 64-40 +#else + sldi 0,6, 40 + srdi 8,7, 64-40 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du6_do): + bf 30,L(du6_1dw) + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 48 + sldi 8,7, 64-48 +#else + sldi 0,6, 48 + srdi 8,7, 64-48 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 48 + sldi 8,6, 64-48 +#else + sldi 0,7, 48 + srdi 8,6, 64-48 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du6_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du6_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 48 + sldi 8,7, 64-48 +#else + sldi 0,6, 48 + srdi 8,7, 64-48 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du6_fini) /* if total DWs = 4, then bypass loop */ + b L(du6_loop) + .align 4 +L(du6_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 48 + sldi 8,7, 64-48 +#else + sldi 0,6, 48 + srdi 8,7, 64-48 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du6_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du6_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 48 + sldi 8,7, 64-48 +#else + sldi 0,6, 48 + srdi 8,7, 64-48 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 48 + sldi 8,6, 64-48 +#else + sldi 0,7, 48 + srdi 8,6, 64-48 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 48 + sldi 8,7, 64-48 +#else + sldi 0,6, 48 + srdi 8,7, 64-48 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 48 + sldi 8,6, 64-48 +#else + sldi 0,7, 48 + srdi 8,6, 64-48 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du6_loop) + .align 4 +L(du6_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 48 + sldi 8,7, 64-48 +#else + sldi 0,6, 48 + srdi 8,7, 64-48 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du7_do): + bf 30,L(du7_1dw) + + /* there are at least two DWs to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 56 + sldi 8,7, 64-56 +#else + sldi 0,6, 56 + srdi 8,7, 64-56 +#endif + or 0,0,8 + ld 6,16(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 56 + sldi 8,6, 64-56 +#else + sldi 0,7, 56 + srdi 8,6, 64-56 +#endif + or 0,0,8 + ld 7,24(5) + std 0,8(4) + addi 4,4,16 + addi 5,5,32 + blt cr6,L(du7_fini) /* if total DWs = 3, then bypass loop */ + bf 31,L(du7_loop) + /* there is a third DW to copy */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 56 + sldi 8,7, 64-56 +#else + sldi 0,6, 56 + srdi 8,7, 64-56 +#endif + or 0,0,8 + std 0,0(4) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + addi 4,4,8 + beq cr6,L(du7_fini) /* if total DWs = 4, then bypass loop */ + b L(du7_loop) + .align 4 +L(du7_1dw): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 56 + sldi 8,7, 64-56 +#else + sldi 0,6, 56 + srdi 8,7, 64-56 +#endif + addi 5,5,16 + or 0,0,8 + bf 31,L(du7_loop) + mr 6,7 + ld 7,0(5) + addi 5,5,8 + std 0,0(4) + addi 4,4,8 + .align 4 +/* copy 32 bytes at a time */ +L(du7_loop): +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 56 + sldi 8,7, 64-56 +#else + sldi 0,6, 56 + srdi 8,7, 64-56 +#endif + or 0,0,8 + ld 6,0(5) + std 0,0(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 56 + sldi 8,6, 64-56 +#else + sldi 0,7, 56 + srdi 8,6, 64-56 +#endif + or 0,0,8 + ld 7,8(5) + std 0,8(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 56 + sldi 8,7, 64-56 +#else + sldi 0,6, 56 + srdi 8,7, 64-56 +#endif + or 0,0,8 + ld 6,16(5) + std 0,16(4) +#ifdef __LITTLE_ENDIAN__ + srdi 0,7, 56 + sldi 8,6, 64-56 +#else + sldi 0,7, 56 + srdi 8,6, 64-56 +#endif + or 0,0,8 + ld 7,24(5) + std 0,24(4) + addi 5,5,32 + addi 4,4,32 + bdnz+ L(du7_loop) + .align 4 +L(du7_fini): + /* calculate and store the final DW */ +#ifdef __LITTLE_ENDIAN__ + srdi 0,6, 56 + sldi 8,7, 64-56 +#else + sldi 0,6, 56 + srdi 8,7, 64-56 +#endif + or 0,0,8 + std 0,0(4) + b L(du_done) + + .align 4 +L(du_done): + rldicr 0,31,0,60 + mtcrf 0x01,31 + beq cr1,0f /* If the tail is 0 bytes we are done! */ + + add 3,3,0 + add 12,12,0 +/* At this point we have a tail of 0-7 bytes and we know that the + destination is double word aligned. */ +4: bf 29,2f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: bf 30,1f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,0f + lbz 6,0(12) + stb 6,0(3) +0: + /* Return original dst pointer. */ + ld 31,-8(1) + ld 3,-16(1) + blr +END_GEN_TB (MEMCPY,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/memset.S new file mode 100644 index 0000000000..aee1c8eabb --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/memset.S @@ -0,0 +1,395 @@ +/* Optimized 64-bit memset implementation for POWER6. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. + + The memset is done in three sizes: byte (8 bits), word (32 bits), + cache line (256 bits). There is a special case for setting cache lines + to 0, to take advantage of the dcbz instruction. */ + +#ifndef MEMSET +# define MEMSET memset +#endif + .machine power6 +EALIGN (MEMSET, 7, 0) + CALL_MCOUNT 3 + +#define rTMP r0 +#define rRTN r3 /* Initial value of 1st argument. */ +#define rMEMP0 r3 /* Original value of 1st arg. */ +#define rCHR r4 /* Char to set in each byte. */ +#define rLEN r5 /* Length of region to set. */ +#define rMEMP r6 /* Address at which we are storing. */ +#define rALIGN r7 /* Number of bytes we are setting now (when aligning). */ +#define rMEMP2 r8 +#define rMEMP3 r9 /* Alt mem pointer. */ +L(_memset): +/* Take care of case for size <= 4. */ + cmpldi cr1, rLEN, 8 + andi. rALIGN, rMEMP0, 7 + mr rMEMP, rMEMP0 + ble cr1, L(small) + +/* Align to doubleword boundary. */ + cmpldi cr5, rLEN, 31 + insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */ + beq+ L(aligned2) + mtcrf 0x01, rMEMP0 + subfic rALIGN, rALIGN, 8 + cror 28,30,31 /* Detect odd word aligned. */ + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ + bt 29, L(g4) +/* Process the even word of doubleword. */ + bf+ 31, L(g2) + stb rCHR, 0(rMEMP0) + bt 30, L(g4x) +L(g2): + sth rCHR, -6(rMEMP) +L(g4x): + stw rCHR, -4(rMEMP) + b L(aligned) +/* Process the odd word of doubleword. */ +L(g4): + bf 28, L(g4x) /* If false, word aligned on odd word. */ + bf+ 31, L(g0) + stb rCHR, 0(rMEMP0) + bt 30, L(aligned) +L(g0): + sth rCHR, -2(rMEMP) + +/* Handle the case of size < 31. */ +L(aligned2): + insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */ +L(aligned): + mtcrf 0x01, rLEN + ble cr5, L(medium) +/* Align to 32-byte boundary. */ + andi. rALIGN, rMEMP, 0x18 + subfic rALIGN, rALIGN, 0x20 + insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + beq L(caligned) + mtcrf 0x01, rALIGN + add rMEMP, rMEMP, rALIGN + sub rLEN, rLEN, rALIGN + cmplwi cr1, rALIGN, 0x10 + mr rMEMP2, rMEMP + bf 28, L(a1) + stdu rCHR, -8(rMEMP2) +L(a1): blt cr1, L(a2) + std rCHR, -8(rMEMP2) + stdu rCHR, -16(rMEMP2) +L(a2): + +/* Now aligned to a 32 byte boundary. */ + .align 4 +L(caligned): + cmpldi cr1, rCHR, 0 + clrrdi. rALIGN, rLEN, 5 + mtcrf 0x01, rLEN + beq cr1, L(zloopstart) /* Special case for clearing memory using dcbz. */ + beq L(medium) /* We may not actually get to do a full line. */ + .align 4 +/* Storing a non-zero "c" value. We are aligned at a sector (32-byte) + boundary may not be at cache line (128-byte) boundary. */ +L(nzloopstart): +/* memset in 32-byte chunks until we get to a cache line boundary. + If rLEN is less than the distance to the next cache-line boundary use + cacheAligned1 code to finish the tail. */ + cmpldi cr1,rLEN,128 + + andi. rTMP,rMEMP,127 + blt cr1,L(cacheAligned1) + addi rMEMP3,rMEMP,32 + beq L(nzCacheAligned) + addi rLEN,rLEN,-32 + std rCHR,0(rMEMP) + std rCHR,8(rMEMP) + std rCHR,16(rMEMP) + addi rMEMP,rMEMP,32 + andi. rTMP,rMEMP3,127 + std rCHR,-8(rMEMP3) + + beq L(nzCacheAligned) + addi rLEN,rLEN,-32 + std rCHR,0(rMEMP3) + addi rMEMP,rMEMP,32 + std rCHR,8(rMEMP3) + andi. rTMP,rMEMP,127 + std rCHR,16(rMEMP3) + std rCHR,24(rMEMP3) + + beq L(nzCacheAligned) + addi rLEN,rLEN,-32 + std rCHR,32(rMEMP3) + addi rMEMP,rMEMP,32 + cmpldi cr1,rLEN,128 + std rCHR,40(rMEMP3) + cmpldi cr6,rLEN,256 + li rMEMP2,128 + std rCHR,48(rMEMP3) + std rCHR,56(rMEMP3) + blt cr1,L(cacheAligned1) + b L(nzCacheAligned128) + +/* Now we are aligned to the cache line and can use dcbtst. */ + .align 4 +L(nzCacheAligned): + cmpldi cr1,rLEN,128 + blt cr1,L(cacheAligned1) + b L(nzCacheAligned128) + .align 5 +L(nzCacheAligned128): + cmpldi cr1,rLEN,256 + addi rMEMP3,rMEMP,64 + std rCHR,0(rMEMP) + std rCHR,8(rMEMP) + std rCHR,16(rMEMP) + std rCHR,24(rMEMP) + std rCHR,32(rMEMP) + std rCHR,40(rMEMP) + std rCHR,48(rMEMP) + std rCHR,56(rMEMP) + addi rMEMP,rMEMP3,64 + addi rLEN,rLEN,-128 + std rCHR,0(rMEMP3) + std rCHR,8(rMEMP3) + std rCHR,16(rMEMP3) + std rCHR,24(rMEMP3) + std rCHR,32(rMEMP3) + std rCHR,40(rMEMP3) + std rCHR,48(rMEMP3) + std rCHR,56(rMEMP3) + bge cr1,L(nzCacheAligned128) + dcbtst 0,rMEMP + b L(cacheAligned1) + .align 5 +/* Storing a zero "c" value. We are aligned at a sector (32-byte) + boundary but may not be at cache line (128-byte) boundary. If the + remaining length spans a full cache line we can use the Data cache + block zero instruction. */ +L(zloopstart): +/* memset in 32-byte chunks until we get to a cache line boundary. + If rLEN is less than the distance to the next cache-line boundary use + cacheAligned1 code to finish the tail. */ + cmpldi cr1,rLEN,128 + beq L(medium) +L(getCacheAligned): + andi. rTMP,rMEMP,127 + nop + blt cr1,L(cacheAligned1) + addi rMEMP3,rMEMP,32 + beq L(cacheAligned) + addi rLEN,rLEN,-32 + std rCHR,0(rMEMP) + std rCHR,8(rMEMP) + std rCHR,16(rMEMP) + addi rMEMP,rMEMP,32 + andi. rTMP,rMEMP3,127 + std rCHR,-8(rMEMP3) +L(getCacheAligned2): + beq L(cacheAligned) + addi rLEN,rLEN,-32 + std rCHR,0(rMEMP3) + std rCHR,8(rMEMP3) + addi rMEMP,rMEMP,32 + andi. rTMP,rMEMP,127 + std rCHR,16(rMEMP3) + std rCHR,24(rMEMP3) +L(getCacheAligned3): + beq L(cacheAligned) + addi rLEN,rLEN,-32 + std rCHR,32(rMEMP3) + addi rMEMP,rMEMP,32 + cmpldi cr1,rLEN,128 + std rCHR,40(rMEMP3) + cmpldi cr6,rLEN,256 + li rMEMP2,128 + std rCHR,48(rMEMP3) + std rCHR,56(rMEMP3) + blt cr1,L(cacheAligned1) + blt cr6,L(cacheAligned128) + b L(cacheAlignedx) + +/* Now we are aligned to the cache line and can use dcbz. */ + .align 5 +L(cacheAligned): + cmpldi cr1,rLEN,128 + cmpldi cr6,rLEN,256 + blt cr1,L(cacheAligned1) + li rMEMP2,128 +L(cacheAlignedx): + cmpldi cr5,rLEN,640 + blt cr6,L(cacheAligned128) + bgt cr5,L(cacheAligned512) + cmpldi cr6,rLEN,512 + dcbz 0,rMEMP + cmpldi cr1,rLEN,384 + dcbz rMEMP2,rMEMP + addi rMEMP,rMEMP,256 + addi rLEN,rLEN,-256 + blt cr1,L(cacheAligned1) + blt cr6,L(cacheAligned128) + b L(cacheAligned256) + .align 5 +/* A simple loop for the longer (>640 bytes) lengths. This form limits + the branch miss-predicted to exactly 1 at loop exit.*/ +L(cacheAligned512): + cmpldi cr1,rLEN,128 + blt cr1,L(cacheAligned1) + dcbz 0,rMEMP + addi rLEN,rLEN,-128 + addi rMEMP,rMEMP,128 + b L(cacheAligned512) + .align 5 +L(cacheAligned256): + + cmpldi cr6,rLEN,512 + + dcbz 0,rMEMP + cmpldi cr1,rLEN,384 + dcbz rMEMP2,rMEMP + addi rMEMP,rMEMP,256 + addi rLEN,rLEN,-256 + + bge cr6,L(cacheAligned256) + + blt cr1,L(cacheAligned1) + .align 4 +L(cacheAligned128): + dcbz 0,rMEMP + addi rMEMP,rMEMP,128 + addi rLEN,rLEN,-128 + nop +L(cacheAligned1): + cmpldi cr1,rLEN,32 + blt cr1,L(handletail32) + addi rMEMP3,rMEMP,32 + addi rLEN,rLEN,-32 + std rCHR,0(rMEMP) + std rCHR,8(rMEMP) + std rCHR,16(rMEMP) + addi rMEMP,rMEMP,32 + cmpldi cr1,rLEN,32 + std rCHR,-8(rMEMP3) +L(cacheAligned2): + blt cr1,L(handletail32) + addi rLEN,rLEN,-32 + std rCHR,0(rMEMP3) + std rCHR,8(rMEMP3) + addi rMEMP,rMEMP,32 + cmpldi cr1,rLEN,32 + std rCHR,16(rMEMP3) + std rCHR,24(rMEMP3) + nop +L(cacheAligned3): + blt cr1,L(handletail32) + addi rMEMP,rMEMP,32 + addi rLEN,rLEN,-32 + std rCHR,32(rMEMP3) + std rCHR,40(rMEMP3) + std rCHR,48(rMEMP3) + std rCHR,56(rMEMP3) + +/* We are here because the length or remainder (rLEN) is less than the + cache line/sector size and does not justify aggressive loop unrolling. + So set up the preconditions for L(medium) and go there. */ + .align 3 +L(handletail32): + cmpldi cr1,rLEN,0 + beqlr cr1 + b L(medium) + + .align 5 +L(small): +/* Memset of 8 bytes or less. */ + cmpldi cr6, rLEN, 4 + cmpldi cr5, rLEN, 1 + ble cr6,L(le4) + subi rLEN, rLEN, 4 + stb rCHR,0(rMEMP) + stb rCHR,1(rMEMP) + stb rCHR,2(rMEMP) + stb rCHR,3(rMEMP) + addi rMEMP,rMEMP, 4 + cmpldi cr5, rLEN, 1 +L(le4): + cmpldi cr1, rLEN, 3 + bltlr cr5 + stb rCHR, 0(rMEMP) + beqlr cr5 + stb rCHR, 1(rMEMP) + bltlr cr1 + stb rCHR, 2(rMEMP) + beqlr cr1 + stb rCHR, 3(rMEMP) + blr + +/* Memset of 0-31 bytes. */ + .align 5 +L(medium): + insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */ + cmpldi cr1, rLEN, 16 +L(medium_tail2): + add rMEMP, rMEMP, rLEN +L(medium_tail): + bt- 31, L(medium_31t) + bt- 30, L(medium_30t) +L(medium_30f): + bt 29, L(medium_29t) +L(medium_29f): + bge cr1, L(medium_27t) + bflr 28 + std rCHR, -8(rMEMP) + blr + +L(medium_31t): + stbu rCHR, -1(rMEMP) + bf- 30, L(medium_30f) +L(medium_30t): + sthu rCHR, -2(rMEMP) + bf- 29, L(medium_29f) +L(medium_29t): + stwu rCHR, -4(rMEMP) + blt cr1, L(medium_27f) +L(medium_27t): + std rCHR, -8(rMEMP) + stdu rCHR, -16(rMEMP) +L(medium_27f): + bflr 28 +L(medium_28t): + std rCHR, -8(rMEMP) + blr +END_GEN_TB (MEMSET,TB_TOCLESS) +libc_hidden_builtin_def (memset) + +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. */ +ENTRY (__bzero) + CALL_MCOUNT 3 + mr r5,r4 + li r4,0 + b L(_memset) +END (__bzero) +#ifndef __bzero +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/multiarch/Implies new file mode 100644 index 0000000000..2ebe304fa6 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power5+/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcschr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcschr.c new file mode 100644 index 0000000000..ae04a130cc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcschr.c @@ -0,0 +1 @@ +#include <sysdeps/powerpc/power6/wcschr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcscpy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcscpy.c new file mode 100644 index 0000000000..722c8f995b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcscpy.c @@ -0,0 +1 @@ +#include <sysdeps/powerpc/power6/wcscpy.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcsrchr.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcsrchr.c new file mode 100644 index 0000000000..b86472d7bd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6/wcsrchr.c @@ -0,0 +1 @@ +#include <sysdeps/powerpc/power6/wcsrchr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/Implies new file mode 100644 index 0000000000..9d68f39d22 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power6/fpu +powerpc/powerpc64/power6 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/Implies new file mode 100644 index 0000000000..30fa17646e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/multiarch/Implies new file mode 100644 index 0000000000..410d289a6d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_isnan.S new file mode 100644 index 0000000000..b6e11ba0c3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_isnan.S @@ -0,0 +1,58 @@ +/* isnan(). PowerPC64 version. + Copyright (C) 2008-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .machine power6 +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + mftgpr r4,fp1 /* copy FPR to GPR */ + lis r0,0x7ff0 + ori r1,r1,0 + clrldi r4,r4,1 /* x = fabs(x) */ + sldi r0,r0,32 /* const long r0 0x7ff00000 00000000 */ + cmpd cr7,r4,r0 /* if (fabs(x) <= inf) */ + li r3,0 /* then return 0 */ + blelr+ cr7 + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S new file mode 100644 index 0000000000..37aa69061c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_llrint.S @@ -0,0 +1,44 @@ +/* Round double to long int. POWER6x PowerPC64 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + .machine "power6" +/* long long int[r3] __llrint (double x[fp1]) */ +ENTRY (__llrint) + CALL_MCOUNT 0 + fctid fp13,fp1 + mftgpr r3,fp13 + blr + END (__llrint) + +strong_alias (__llrint, __lrint) +weak_alias (__llrint, llrint) +weak_alias (__lrint, lrint) + +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +strong_alias (__lrint, __lrintl) +weak_alias (__lrint, lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S new file mode 100644 index 0000000000..62e1798785 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/fpu/s_llround.S @@ -0,0 +1,54 @@ +/* llround function. POWER6x PowerPC64 version. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* long long [r3] llround (float x [fp1]) + IEEE 1003.1 llround function. IEEE specifies "round to the nearest + integer value, rounding halfway cases away from zero, regardless of + the current rounding mode." However PowerPC Architecture defines + "round to Nearest" as "Choose the best approximation. In case of a + tie, choose the one that is even (least significant bit o).". + So we pre-round using the V2.02 Floating Round to Integer Nearest + instruction before we use Floating Convert to Integer Word with + round to zero instruction. */ + + .machine "power6" +ENTRY (__llround) + CALL_MCOUNT 0 + frin fp2,fp1 /* Round to nearest +-0.5. */ + fctidz fp3,fp2 /* Convert To Integer DW round toward 0. */ + mftgpr r3,fp3 /* Transfer integer to R3. */ + blr + END (__llround) + +strong_alias (__llround, __lround) +weak_alias (__llround, llround) +weak_alias (__lround, lround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/multiarch/Implies new file mode 100644 index 0000000000..bf5d6171a5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power6x/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/Implies new file mode 100644 index 0000000000..9d68f39d22 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power6/fpu +powerpc/powerpc64/power6 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/Makefile new file mode 100644 index 0000000000..89a2296085 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/Makefile @@ -0,0 +1,11 @@ +ifeq ($(subdir),elf) +# Prevent the use of VSX registers and insns in _dl_start, which under -O3 +# optimization may require a TOC reference before relocations are resolved. +CFLAGS-rtld.c += -mno-vsx +endif + +ifeq ($(subdir),string) +sysdep_routines += strstr-ppc64 +CFLAGS-strncase.c += -funroll-loops +CFLAGS-strncase_l.c += -funroll-loops +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/add_n.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/add_n.S new file mode 100644 index 0000000000..6425afbc9f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/add_n.S @@ -0,0 +1,98 @@ +/* PowerPC64 mpn_lshift -- mpn_add_n/mpn_sub_n -- mpn addition and + subtraction. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* cycles/limb + * POWER7 2.18 + */ + +#ifdef USE_AS_SUB +# define FUNC __mpn_sub_n +# define ADDSUBC subfe +#else +# define FUNC __mpn_add_n +# define ADDSUBC adde +#endif + +#define RP r3 +#define UP r4 +#define VP r5 +#define N r6 + +EALIGN(FUNC, 5, 0) +#ifdef USE_AS_SUB + addic r0, r0, 0 +#else + addic r0, r1, -1 +#endif + andi. r7, N, 1 + beq L(bx0) + + ld r7, 0(UP) + ld r9, r0(VP) + ADDSUBC r11, r9, r7 + std r11, r0(RP) + cmpldi N, N, 1 + beq N, L(end) + addi UP, UP, 8 + addi VP, VP, 8 + addi RP, RP, 8 + +L(bx0): addi r0, N, 2 + srdi r0, r0, 2 + mtctr r0 + + andi. r7, N, 2 + bne L(mid) + + addi UP, UP, 16 + addi VP, VP, 16 + addi RP, RP, 16 + + .align 5 +L(top): ld r6, -16(UP) + ld r7, -8(UP) + ld r8, -16(VP) + ld r9, -8(VP) + ADDSUBC r10, r8, N + ADDSUBC r11, r9, r7 + std r10, -16(RP) + std r11, -8(RP) +L(mid): ld r6, 0(UP) + ld r7, 8(UP) + ld r8, 0(VP) + ld r9, 8(VP) + ADDSUBC r10, r8, N + ADDSUBC r11, r9, r7 + std r10, 0(RP) + std r11, 8(RP) + addi UP, UP, 32 + addi VP, VP, 32 + addi RP, RP, 32 + bdnz L(top) + +L(end): subfe r3, r0, r0 +#ifdef USE_AS_SUB + neg r3, r3 +#else + addi r3, r3, 1 +#endif + blr +END(FUNC) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/bcopy.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/bcopy.c new file mode 100644 index 0000000000..4a6a400e7a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/bcopy.c @@ -0,0 +1 @@ +/* Implemented at memmove.S */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/Implies new file mode 100644 index 0000000000..30fa17646e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/multiarch/Implies new file mode 100644 index 0000000000..410d289a6d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S new file mode 100644 index 0000000000..9ccc758c9e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S @@ -0,0 +1,70 @@ +/* finite(). PowerPC64/POWER7 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __finite(x) */ + .section ".toc","aw" +.LC0: /* 1.0 */ + .tc FD_ONE[TC],0x3ff0000000000000 + .section ".text" + .type __finite, @function + .machine power7 +EALIGN (__finite, 4, 0) + CALL_MCOUNT 0 + lfd fp0,.LC0@toc(r2) + ftdiv cr7,fp1,fp0 + li r3,1 + bflr 30 + + /* If we are here, we either have +/-INF, + NaN or denormal. */ + + stfd fp1,-16(r1) /* Transfer FP to GPR's. */ + ori 2,2,0 /* Force a new dispatch group. */ + lhz r4,-16+HISHORT(r1) /* Fetch the upper 16 bits of the FP value + (biased exponent and sign bit). */ + clrlwi r4,r4,17 /* r4 = abs(r4). */ + cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */ + bltlr cr7 /* LT means finite, other non-finite. */ + li r3,0 + blr + END (__finite) + +hidden_def (__finite) +weak_alias (__finite, finite) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__finite, __finitef) +hidden_def (__finitef) +weak_alias (__finitef, finitef) + +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, __finite, __finitel, GLIBC_2_0) +compat_symbol (libm, finite, finitel, GLIBC_2_0) +# endif +#else +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __finite, __finitel, GLIBC_2_0); +compat_symbol (libc, finite, finitel, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S new file mode 100644 index 0000000000..54bd94176d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S @@ -0,0 +1 @@ +/* This function uses the same code as s_finite.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S new file mode 100644 index 0000000000..4482cddcfa --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S @@ -0,0 +1,69 @@ +/* isinf(). PowerPC64/POWER7 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isinf(x) */ + .section ".toc","aw" +.LC0: /* 1.0 */ + .tc FD_ONE[TC],0x3ff0000000000000 + .section ".text" + .type __isinf, @function + .machine power7 +EALIGN (__isinf, 4, 0) + CALL_MCOUNT 0 + lfd fp0,.LC0@toc(r2) + ftdiv cr7,fp1,fp0 + li r3,0 + bflr 29 /* If not INF, return. */ + + /* Either we have -INF/+INF or a denormal. */ + + stfd fp1,-16(r1) /* Transfer FP to GPR's. */ + ori 2,2,0 /* Force a new dispatch group. */ + lhz r4,-16+HISHORT(r1) /* Fetch the upper 16 bits of the FP value + (biased exponent and sign bit). */ + cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */ + li r3,1 + beqlr cr7 /* EQ means INF, otherwise -INF. */ + li r3,-1 + blr + END (__isinf) + +hidden_def (__isinf) +weak_alias (__isinf, isinf) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isinf, __isinff) +hidden_def (__isinff) +weak_alias (__isinff, isinff) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isinf, __isinfl) +weak_alias (__isinf, isinfl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0); +compat_symbol (libc, isinf, isinfl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S new file mode 100644 index 0000000000..be759e091e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S @@ -0,0 +1 @@ +/* This function uses the same code as s_isinf.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S new file mode 100644 index 0000000000..46b08a0d37 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S @@ -0,0 +1,68 @@ +/* isnan(). PowerPC64/POWER7 version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +/* int __isnan(x) */ + .section ".toc","aw" +.LC0: /* 1.0 */ + .tc FD_ONE[TC],0x3ff0000000000000 + .section ".text" + .type __isnan, @function + .machine power7 +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + lfd fp0,.LC0@toc(r2) + ftdiv cr7,fp1,fp0 + li r3,0 + bflr 30 /* If not NaN, finish. */ + + stfd fp1,-16(r1) /* Transfer FP to GPR's. */ + ori 2,2,0 /* Force a new dispatch group. */ + ld r4,-16(r1) /* Load FP into GPR. */ + lis r0,0x7ff0 + sldi r0,r0,32 /* const long r0 0x7ff00000 00000000. */ + clrldi r4,r4,1 /* x = fabs(x) */ + cmpd cr7,r4,r0 /* if (fabs(x) <= inf) */ + blelr cr7 /* LE means not NaN. */ + li r3,1 /* else return 1 */ + blr + END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S new file mode 100644 index 0000000000..b48c85e0d3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S @@ -0,0 +1 @@ +/* This function uses the same code as s_isnan.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c new file mode 100644 index 0000000000..2599c771d9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c @@ -0,0 +1 @@ +#include <sysdeps/powerpc/power7/fpu/s_logb.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c new file mode 100644 index 0000000000..7a5a8032e0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logbf.c @@ -0,0 +1 @@ +#include <sysdeps/powerpc/power7/fpu/s_logbf.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c new file mode 100644 index 0000000000..524ae2c78d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/fpu/s_logbl.c @@ -0,0 +1 @@ +#include <sysdeps/powerpc/power7/fpu/s_logbl.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memchr.S new file mode 100644 index 0000000000..5e9707aa02 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memchr.S @@ -0,0 +1,199 @@ +/* Optimized memchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memchr (char *s [r3], int byte [r4], int size [r5]) */ + +#ifndef MEMCHR +# define MEMCHR __memchr +#endif + .machine power7 +ENTRY (MEMCHR) + CALL_MCOUNT 3 + dcbt 0,r3 + clrrdi r8,r3,3 + insrdi r4,r4,8,48 + + /* Calculate the last acceptable address and check for possible + addition overflow by using satured math: + r7 = r3 + r5 + r7 |= -(r7 < x) */ + add r7,r3,r5 + subfc r6,r3,r7 + subfe r9,r9,r9 + extsw r6,r9 + or r7,r7,r6 + + insrdi r4,r4,16,32 + cmpldi r5,32 + li r9, -1 + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + insrdi r4,r4,32,0 + addi r7,r7,-1 +#ifdef __LITTLE_ENDIAN__ + sld r9,r9,r6 +#else + srd r9,r9,r6 +#endif + ble L(small_range) + + ld r12,0(r8) /* Load doubleword from memory. */ + cmpb r3,r12,r4 /* Check for BYTEs in DWORD1. */ + and r3,r3,r9 + clrldi r5,r7,61 /* Byte count - 1 in last dword. */ + clrrdi r7,r7,3 /* Address of last doubleword. */ + cmpldi cr7,r3,0 /* Does r3 indicate we got a hit? */ + bne cr7,L(done) + + mtcrf 0x01,r8 + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + bt 28,L(loop_setup) + + /* Handle DWORD2 of pair. */ + ldu r12,8(r8) + cmpb r3,r12,r4 + cmpldi cr7,r3,0 + bne cr7,L(done) + +L(loop_setup): + /* The last dword we want to read in the loop below is the one + containing the last byte of the string, ie. the dword at + (s + size - 1) & ~7, or r7. The first dword read is at + r8 + 8, we read 2 * cnt dwords, so the last dword read will + be at r8 + 8 + 16 * cnt - 8. Solving for cnt gives + cnt = (r7 - r8) / 16 */ + sub r6,r7,r8 + srdi r6,r6,4 /* Number of loop iterations. */ + mtctr r6 /* Setup the counter. */ + + /* Main loop to look for BYTE in the string. Since + it's a small loop (8 instructions), align it to 32-bytes. */ + .align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r3,r12,r4 + cmpb r9,r11,r4 + or r6,r9,r3 /* Merge everything in one doubleword. */ + cmpldi cr7,r6,0 + bne cr7,L(found) + bdnz L(loop) + + /* We may have one more dword to read. */ + cmpld r8,r7 + beqlr + + ldu r12,8(r8) + cmpb r3,r12,r4 + cmpldi cr6,r3,0 + bne cr6,L(done) + blr + + .align 4 +L(found): + /* OK, one (or both) of the doublewords contains BYTE. Check + the first doubleword and decrement the address in case the first + doubleword really contains BYTE. */ + cmpldi cr6,r3,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* BYTE must be in the second doubleword. Adjust the address + again and move the result of cmpb to r3 so we can calculate the + pointer. */ + + mr r3,r9 + addi r8,r8,8 + + /* r3 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as BYTE in the original + doubleword from the string. Use that to calculate the pointer. + We need to make sure BYTE is *before* the end of the range. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r3,-1 + andc r0,r0,r3 + popcntd r0,r0 /* Count trailing zeros. */ +#else + cntlzd r0,r3 /* Count leading zeros before the match. */ +#endif + cmpld r8,r7 /* Are we on the last dword? */ + srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r8,r0 + cmpld cr7,r0,r5 /* If on the last dword, check byte offset. */ + bnelr + blelr cr7 + li r3,0 + blr + + .align 4 +L(null): + li r3,0 + blr + +/* Deals with size <= 32. */ + .align 4 +L(small_range): + cmpldi r5,0 + beq L(null) + ld r12,0(r8) /* Load word from memory. */ + cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */ + and r3,r3,r9 + cmpldi cr7,r3,0 + clrldi r5,r7,61 /* Byte count - 1 in last dword. */ + clrrdi r7,r7,3 /* Address of last doubleword. */ + cmpld r8,r7 /* Are we done already? */ + bne cr7,L(done) + beqlr + + ldu r12,8(r8) + cmpb r3,r12,r4 + cmpldi cr6,r3,0 + cmpld r8,r7 + bne cr6,L(done) /* Found something. */ + beqlr /* Hit end of string (length). */ + + ldu r12,8(r8) + cmpb r3,r12,r4 + cmpldi cr6,r3,0 + cmpld r8,r7 + bne cr6,L(done) + beqlr + + ldu r12,8(r8) + cmpb r3,r12,r4 + cmpldi cr6,r3,0 + cmpld r8,r7 + bne cr6,L(done) + beqlr + + ldu r12,8(r8) + cmpb r3,r12,r4 + cmpldi cr6,r3,0 + bne cr6,L(done) + blr + +END (MEMCHR) +weak_alias (__memchr, memchr) +libc_hidden_builtin_def (memchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memcmp.S new file mode 100644 index 0000000000..96ce8cee25 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memcmp.S @@ -0,0 +1,1061 @@ +/* Optimized memcmp implementation for POWER7/PowerPC64. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memcmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ +#ifndef MEMCMP +# define MEMCMP memcmp +#endif + .machine power7 +EALIGN (MEMCMP, 4, 0) + CALL_MCOUNT 3 + +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r8 /* next word in s1 */ +#define rWORD4 r9 /* next word in s2 */ +#define rWORD5 r10 /* next word in s1 */ +#define rWORD6 r11 /* next word in s2 */ + +#define rOFF8 r20 /* 8 bytes offset. */ +#define rOFF16 r21 /* 16 bytes offset. */ +#define rOFF24 r22 /* 24 bytes offset. */ +#define rOFF32 r23 /* 24 bytes offset. */ +#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */ +#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */ +#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */ +#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */ +#define rSHR r28 /* Unaligned shift right count. */ +#define rSHL r29 /* Unaligned shift left count. */ +#define rWORD7 r30 /* next word in s1 */ +#define rWORD8 r31 /* next word in s2 */ + +#define rWORD8SAVE (-8) +#define rWORD7SAVE (-16) +#define rOFF8SAVE (-24) +#define rOFF16SAVE (-32) +#define rOFF24SAVE (-40) +#define rOFF32SAVE (-48) +#define rSHRSAVE (-56) +#define rSHLSAVE (-64) +#define rWORD8SHIFTSAVE (-72) +#define rWORD2SHIFTSAVE (-80) +#define rWORD4SHIFTSAVE (-88) +#define rWORD6SHIFTSAVE (-96) + +#ifdef __LITTLE_ENDIAN__ +# define LD ldbrx +#else +# define LD ldx +#endif + + xor r0, rSTR2, rSTR1 + cmpldi cr6, rN, 0 + cmpldi cr1, rN, 12 + clrldi. r0, r0, 61 + clrldi r12, rSTR1, 61 + cmpldi cr5, r12, 0 + beq- cr6, L(zeroLength) + dcbt 0, rSTR1 + dcbt 0, rSTR2 +/* If less than 8 bytes or not aligned, use the unaligned + byte loop. */ + blt cr1, L(bytealigned) + std rWORD8, rWORD8SAVE(r1) + std rWORD7, rWORD7SAVE(r1) + std rOFF8, rOFF8SAVE(r1) + std rOFF16, rOFF16SAVE(r1) + std rOFF24, rOFF24SAVE(r1) + std rOFF32, rOFF32SAVE(r1) + cfi_offset(rWORD8, rWORD8SAVE) + cfi_offset(rWORD7, rWORD7SAVE) + cfi_offset(rOFF8, rOFF8SAVE) + cfi_offset(rOFF16, rOFF16SAVE) + cfi_offset(rOFF24, rOFF24SAVE) + cfi_offset(rOFF32, rOFF32SAVE) + + li rOFF8,8 + li rOFF16,16 + li rOFF24,24 + li rOFF32,32 + + bne L(unaligned) +/* At this point we know both strings have the same alignment and the + compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then we are already double word + aligned and can perform the DW aligned loop. + + Otherwise we know the two strings have the same alignment (but not + yet DW). So we force the string addresses to the next lower DW + boundary and special case this first DW using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (DW aligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected register pair. */ + .align 4 +L(samealignment): + clrrdi rSTR1, rSTR1, 3 + clrrdi rSTR2, rSTR2, 3 + beq cr5, L(DWaligned) + add rN, rN, r12 + sldi rWORD6, r12, 3 + srdi r0, rN, 5 /* Divide by 32 */ + andi. r12, rN, 24 /* Get the DW remainder */ + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dPs4) + mtctr r0 + bgt cr1, L(dPs3) + beq cr1, L(dPs2) + +/* Remainder is 8 */ + .align 3 +L(dsP1): + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD2, rWORD6 + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +/* Do something useful in this cycle since we have to branch anyway. */ + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr7, rWORD1, rWORD2 + b L(dP1e) +/* Remainder is 16 */ + .align 4 +L(dPs2): + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD2, rWORD6 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +/* Do something useful in this cycle since we have to branch anyway. */ + LD rWORD7, rOFF8, rSTR1 + LD rWORD8, rOFF8, rSTR2 + cmpld cr5, rWORD7, rWORD8 + b L(dP2e) +/* Remainder is 24 */ + .align 4 +L(dPs3): + sld rWORD3, rWORD1, rWORD6 + sld rWORD4, rWORD2, rWORD6 + cmpld cr1, rWORD3, rWORD4 + b L(dP3e) +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(dPs4): + mtctr r0 + sld rWORD1, rWORD1, rWORD6 + sld rWORD2, rWORD2, rWORD6 + cmpld cr7, rWORD1, rWORD2 + b L(dP4e) + +/* At this point we know both strings are double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWaligned): + andi. r12, rN, 24 /* Get the DW remainder */ + srdi r0, rN, 5 /* Divide by 32 */ + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dP4) + bgt cr1, L(dP3) + beq cr1, L(dP2) + +/* Remainder is 8 */ + .align 4 +L(dP1): + mtctr r0 +/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not + change any on the early exit path. The key here is the non-early + exit path only cares about the condition code (cr5), not about which + register pair was used. */ + LD rWORD5, 0, rSTR1 + LD rWORD6, 0, rSTR2 + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr7, rWORD1, rWORD2 +L(dP1e): + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr1, rWORD3, rWORD4 + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5x) + bne cr7, L(dLcr7x) + + LD rWORD7, rOFF32, rSTR1 + LD rWORD8, rOFF32, rSTR2 + addi rSTR1, rSTR1, 32 + addi rSTR2, rSTR2, 32 + bne cr1, L(dLcr1) + cmpld cr5, rWORD7, rWORD8 + bdnz L(dLoop) + bne cr6, L(dLcr6) + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + .align 3 +L(dP1x): + sldi. r12, rN, 3 + bne cr5, L(dLcr5x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +/* Remainder is 16 */ + .align 4 +L(dP2): + mtctr r0 + LD rWORD5, 0, rSTR1 + LD rWORD6, 0, rSTR2 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) + LD rWORD7, rOFF8, rSTR1 + LD rWORD8, rOFF8, rSTR2 + cmpld cr5, rWORD7, rWORD8 +L(dP2e): + LD rWORD1, rOFF16, rSTR1 + LD rWORD2, rOFF16, rSTR2 + cmpld cr7, rWORD1, rWORD2 + LD rWORD3, rOFF24, rSTR1 + LD rWORD4, rOFF24, rSTR2 + cmpld cr1, rWORD3, rWORD4 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr6, L(dLcr6) + bne cr5, L(dLcr5) + b L(dLoop2) + .align 4 +L(dP2x): + LD rWORD3, rOFF8, rSTR1 + LD rWORD4, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + sldi. r12, rN, 3 + bne cr6, L(dLcr6x) + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr1, L(dLcr1x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +/* Remainder is 24 */ + .align 4 +L(dP3): + mtctr r0 + LD rWORD3, 0, rSTR1 + LD rWORD4, 0, rSTR2 + cmpld cr1, rWORD3, rWORD4 +L(dP3e): + LD rWORD5, rOFF8, rSTR1 + LD rWORD6, rOFF8, rSTR2 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP3x) + LD rWORD7, rOFF16, rSTR1 + LD rWORD8, rOFF16, rSTR2 + cmpld cr5, rWORD7, rWORD8 + LD rWORD1, rOFF24, rSTR1 + LD rWORD2, rOFF24, rSTR2 + cmpld cr7, rWORD1, rWORD2 + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + bne cr1, L(dLcr1) + bne cr6, L(dLcr6) + b L(dLoop1) +/* Again we are on a early exit path (24-31 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP3x): + LD rWORD1, rOFF16, rSTR1 + LD rWORD2, rOFF16, rSTR2 + cmpld cr7, rWORD1, rWORD2 + sldi. r12, rN, 3 + bne cr1, L(dLcr1x) + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + bne cr6, L(dLcr6x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne cr7, L(dLcr7x) + bne L(d00) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(dP4): + mtctr r0 + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpld cr7, rWORD1, rWORD2 +L(dP4e): + LD rWORD3, rOFF8, rSTR1 + LD rWORD4, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + LD rWORD5, rOFF16, rSTR1 + LD rWORD6, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + LD rWORD7, rOFF24, rSTR1 + LD rWORD8, rOFF24, rSTR2 + addi rSTR1, rSTR1, 24 + addi rSTR2, rSTR2, 24 + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) + bne cr1, L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(dLoop): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) +L(dLoop1): + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) +L(dLoop2): + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) +L(dLoop3): + LD rWORD7, rOFF32, rSTR1 + LD rWORD8, rOFF32, rSTR2 + addi rSTR1, rSTR1, 32 + addi rSTR2, rSTR2, 32 + bne cr1, L(dLcr1) + cmpld cr7, rWORD1, rWORD2 + bdnz L(dLoop) + +L(dL4): + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) + cmpld cr5, rWORD7, rWORD8 +L(d44): + bne cr7, L(dLcr7) +L(d34): + bne cr1, L(dLcr1) +L(d24): + bne cr6, L(dLcr6) +L(d14): + sldi. r12, rN, 3 + bne cr5, L(dLcr5) +L(d04): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + beq L(duzeroLength) +/* At this point we have a remainder of 1 to 7 bytes to compare. Since + we are aligned it is safe to load the whole double word, and use + shift right double to eliminate bits beyond the compare length. */ +L(d00): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN + cmpld cr7, rWORD1, rWORD2 + bne cr7, L(dLcr7x) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + + .align 4 +L(dLcr7): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr7x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr + .align 4 +L(dLcr1): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr1x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr + .align 4 +L(dLcr6): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr6x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + .align 4 +L(dLcr5): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr5x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr5 + li rRTN, -1 + blr + + .align 4 +L(bytealigned): + mtctr rN + +/* We need to prime this loop. This loop is swing modulo scheduled + to avoid pipe delays. The dependent instruction latencies (load to + compare to conditional branch) is 2 to 3 cycles. In this loop each + dispatch group ends in a branch and takes 1 cycle. Effectively + the first iteration of the loop only serves to load operands and + branches based on compares are delayed until the next loop. + + So we must precondition some registers and condition codes so that + we don't exit the loop early on the first iteration. */ + + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + bdz L(b11) + cmpld cr7, rWORD1, rWORD2 + lbz rWORD3, 1(rSTR1) + lbz rWORD4, 1(rSTR2) + bdz L(b12) + cmpld cr1, rWORD3, rWORD4 + lbzu rWORD5, 2(rSTR1) + lbzu rWORD6, 2(rSTR2) + bdz L(b13) + .align 4 +L(bLoop): + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + bne cr7, L(bLcr7) + + cmpld cr6, rWORD5, rWORD6 + bdz L(b3i) + + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + bne cr1, L(bLcr1) + + cmpld cr7, rWORD1, rWORD2 + bdz L(b2i) + + lbzu rWORD5, 1(rSTR1) + lbzu rWORD6, 1(rSTR2) + bne cr6, L(bLcr6) + + cmpld cr1, rWORD3, rWORD4 + bdnz L(bLoop) + +/* We speculatively loading bytes before we have tested the previous + bytes. But we must avoid overrunning the length (in the ctr) to + prevent these speculative loads from causing a segfault. In this + case the loop will exit early (before the all pending bytes are + tested. In this case we must complete the pending operations + before returning. */ +L(b1i): + bne cr7, L(bLcr7) + bne cr1, L(bLcr1) + b L(bx56) + .align 4 +L(b2i): + bne cr6, L(bLcr6) + bne cr7, L(bLcr7) + b L(bx34) + .align 4 +L(b3i): + bne cr1, L(bLcr1) + bne cr6, L(bLcr6) + b L(bx12) + .align 4 +L(bLcr7): + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr +L(bLcr1): + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr +L(bLcr6): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + +L(b13): + bne cr7, L(bx12) + bne cr1, L(bx34) +L(bx56): + sub rRTN, rWORD5, rWORD6 + blr + nop +L(b12): + bne cr7, L(bx12) +L(bx34): + sub rRTN, rWORD3, rWORD4 + blr +L(b11): +L(bx12): + sub rRTN, rWORD1, rWORD2 + blr + + .align 4 +L(zeroLength): + li rRTN, 0 + blr + + .align 4 +/* At this point we know the strings have different alignment and the + compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then rStr1 is double word + aligned and can perform the DWunaligned loop. + + Otherwise we know that rSTR1 is not already DW aligned yet. + So we can force the string addresses to the next lower DW + boundary and special case this first DW using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (DWaligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected resister pair. */ +L(unaligned): + std rSHL, rSHLSAVE(r1) + cfi_offset(rSHL, rSHLSAVE) + clrldi rSHL, rSTR2, 61 + beq cr6, L(duzeroLength) + std rSHR, rSHRSAVE(r1) + cfi_offset(rSHR, rSHRSAVE) + beq cr5, L(DWunaligned) + std rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + cfi_offset(rWORD8_SHIFT, rWORD8SHIFTSAVE) +/* Adjust the logical start of rSTR2 to compensate for the extra bits + in the 1st rSTR1 DW. */ + sub rWORD8_SHIFT, rSTR2, r12 +/* But do not attempt to address the DW before that DW that contains + the actual start of rSTR2. */ + clrrdi rSTR2, rSTR2, 3 + std rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) +/* Compute the left/right shift counts for the unaligned rSTR2, + compensating for the logical (DW aligned) start of rSTR1. */ + clrldi rSHL, rWORD8_SHIFT, 61 + clrrdi rSTR1, rSTR1, 3 + std rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) + sldi rSHL, rSHL, 3 + cmpld cr5, rWORD8_SHIFT, rSTR2 + add rN, rN, r12 + sldi rWORD6, r12, 3 + std rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + cfi_offset(rWORD2_SHIFT, rWORD2SHIFTSAVE) + cfi_offset(rWORD4_SHIFT, rWORD4SHIFTSAVE) + cfi_offset(rWORD6_SHIFT, rWORD6SHIFTSAVE) + subfic rSHR, rSHL, 64 + srdi r0, rN, 5 /* Divide by 32 */ + andi. r12, rN, 24 /* Get the DW remainder */ +/* We normally need to load 2 DWs to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a DW where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ + li rWORD8, 0 + blt cr5, L(dus0) + LD rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 8 + sld rWORD8, rWORD8, rSHL + +L(dus0): + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + srd r12, rWORD2, rSHR + clrldi rN, rN, 61 + beq L(duPs4) + mtctr r0 + or rWORD8, r12, rWORD8 + bgt cr1, L(duPs3) + beq cr1, L(duPs2) + +/* Remainder is 8 */ + .align 4 +L(dusP1): + sld rWORD8_SHIFT, rWORD2, rSHL + sld rWORD7, rWORD1, rWORD6 + sld rWORD8, rWORD8, rWORD6 + bge cr7, L(duP1e) +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ + cmpld cr5, rWORD7, rWORD8 + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 16 */ + .align 4 +L(duPs2): + sld rWORD6_SHIFT, rWORD2, rSHL + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD8, rWORD6 + b L(duP2e) +/* Remainder is 24 */ + .align 4 +L(duPs3): + sld rWORD4_SHIFT, rWORD2, rSHL + sld rWORD3, rWORD1, rWORD6 + sld rWORD4, rWORD8, rWORD6 + b L(duP3e) +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(duPs4): + mtctr r0 + or rWORD8, r12, rWORD8 + sld rWORD2_SHIFT, rWORD2, rSHL + sld rWORD1, rWORD1, rWORD6 + sld rWORD2, rWORD8, rWORD6 + b L(duP4e) + +/* At this point we know rSTR1 is double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWunaligned): + std rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + clrrdi rSTR2, rSTR2, 3 + std rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) + srdi r0, rN, 5 /* Divide by 32 */ + std rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) + andi. r12, rN, 24 /* Get the DW remainder */ + std rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + cfi_offset(rWORD8_SHIFT, rWORD8SHIFTSAVE) + cfi_offset(rWORD2_SHIFT, rWORD2SHIFTSAVE) + cfi_offset(rWORD4_SHIFT, rWORD4SHIFTSAVE) + cfi_offset(rWORD6_SHIFT, rWORD6SHIFTSAVE) + sldi rSHL, rSHL, 3 + LD rWORD6, 0, rSTR2 + LD rWORD8, rOFF8, rSTR2 + addi rSTR2, rSTR2, 8 + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + subfic rSHR, rSHL, 64 + sld rWORD6_SHIFT, rWORD6, rSHL + beq L(duP4) + mtctr r0 + bgt cr1, L(duP3) + beq cr1, L(duP2) + +/* Remainder is 8 */ + .align 4 +L(duP1): + srd r12, rWORD8, rSHR + LD rWORD7, 0, rSTR1 + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP1x) +L(duP1e): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr5, rWORD7, rWORD8 + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr7, rWORD1, rWORD2 + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + bne cr5, L(duLcr5) + or rWORD4, r12, rWORD2_SHIFT + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr1, rWORD3, rWORD4 + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + bne cr7, L(duLcr7) + or rWORD6, r0, rWORD4_SHIFT + cmpld cr6, rWORD5, rWORD6 + b L(duLoop3) + .align 4 +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +L(duP1x): + cmpld cr5, rWORD7, rWORD8 + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 16 */ + .align 4 +L(duP2): + srd r0, rWORD8, rSHR + LD rWORD5, 0, rSTR1 + or rWORD6, r0, rWORD6_SHIFT + sld rWORD6_SHIFT, rWORD8, rSHL +L(duP2e): + LD rWORD7, rOFF8, rSTR1 + LD rWORD8, rOFF8, rSTR2 + cmpld cr6, rWORD5, rWORD6 + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP2x) + LD rWORD1, rOFF16, rSTR1 + LD rWORD2, rOFF16, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT + LD rWORD3, rOFF24, rSTR1 + LD rWORD4, rOFF24, rSTR2 + cmpld cr7, rWORD1, rWORD2 + bne cr5, L(duLcr5) + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + cmpld cr1, rWORD3, rWORD4 + b L(duLoop2) + .align 4 +L(duP2x): + cmpld cr5, rWORD7, rWORD8 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) + +/* Remainder is 24 */ + .align 4 +L(duP3): + srd r12, rWORD8, rSHR + LD rWORD3, 0, rSTR1 + sld rWORD4_SHIFT, rWORD8, rSHL + or rWORD4, r12, rWORD6_SHIFT +L(duP3e): + LD rWORD5, rOFF8, rSTR1 + LD rWORD6, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT + LD rWORD7, rOFF16, rSTR1 + LD rWORD8, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP3x) + LD rWORD1, rOFF24, rSTR1 + LD rWORD2, rOFF24, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + cmpld cr7, rWORD1, rWORD2 + b L(duLoop1) + .align 4 +L(duP3x): + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) + +/* Count is a multiple of 32, remainder is 0 */ + .align 4 +L(duP4): + mtctr r0 + srd r0, rWORD8, rSHR + LD rWORD1, 0, rSTR1 + sld rWORD2_SHIFT, rWORD8, rSHL + or rWORD2, r0, rWORD6_SHIFT +L(duP4e): + LD rWORD3, rOFF8, rSTR1 + LD rWORD4, rOFF8, rSTR2 + cmpld cr7, rWORD1, rWORD2 + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT + LD rWORD5, rOFF16, rSTR1 + LD rWORD6, rOFF16, rSTR2 + cmpld cr1, rWORD3, rWORD4 + bne cr7, L(duLcr7) + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT + LD rWORD7, rOFF24, rSTR1 + LD rWORD8, rOFF24, rSTR2 + addi rSTR1, rSTR1, 24 + addi rSTR2, rSTR2, 24 + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + cmpld cr5, rWORD7, rWORD8 + bdz L(du24) /* Adjust CTR as we start with +4 */ +/* This is the primary loop */ + .align 4 +L(duLoop): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +L(duLoop1): + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +L(duLoop2): + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(duLcr7) + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +L(duLoop3): + LD rWORD7, rOFF32, rSTR1 + LD rWORD8, rOFF32, rSTR2 + addi rSTR1, rSTR1, 32 + addi rSTR2, rSTR2, 32 + cmpld cr7, rWORD1, rWORD2 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + bdnz L(duLoop) + +L(duL4): + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + cmpld cr5, rWORD7, rWORD8 +L(du44): + bne cr7, L(duLcr7) +L(du34): + bne cr1, L(duLcr1) +L(du24): + bne cr6, L(duLcr6) +L(du14): + sldi. rN, rN, 3 + bne cr5, L(duLcr5) +/* At this point we have a remainder of 1 to 7 bytes to compare. We use + shift right double to eliminate bits beyond the compare length. + + However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in + rWORD8_SHIFT). */ + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + .align 4 +L(dutrim): + LD rWORD1, rOFF8, rSTR1 + ld rWORD8, -8(r1) + subfic rN, rN, 64 /* Shift count is 64 - (rN * 8). */ + or rWORD2, r0, rWORD8_SHIFT + ld rWORD7, rWORD7SAVE(r1) + ld rSHL, rSHLSAVE(r1) + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN + ld rSHR, rSHRSAVE(r1) + ld rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + li rRTN, 0 + cmpld cr7, rWORD1, rWORD2 + ld rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) + ld rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) + beq cr7, L(dureturn24) + li rRTN, 1 + ld rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + bgtlr cr7 + li rRTN, -1 + blr + .align 4 +L(duLcr7): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr7, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr1): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr1, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr6): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr6, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr5): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr5, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + + .align 3 +L(duZeroReturn): + li rRTN, 0 + .align 4 +L(dureturn): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dureturn29): + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) +L(dureturn27): + ld rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + ld rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) + ld rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) +L(dureturn24): + ld rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + blr + +L(duzeroLength): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +END (MEMCMP) +libc_hidden_builtin_def (memcmp) +weak_alias (memcmp, bcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memcpy.S new file mode 100644 index 0000000000..e08993cbc3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memcpy.S @@ -0,0 +1,430 @@ +/* Optimized memcpy implementation for PowerPC64/POWER7. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + +/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst'. */ + +#ifndef MEMCPY +# define MEMCPY memcpy +#endif + +#define dst 11 /* Use r11 so r3 kept unchanged. */ +#define src 4 +#define cnt 5 + + .machine power7 +EALIGN (MEMCPY, 5, 0) + CALL_MCOUNT 3 + + cmpldi cr1,cnt,31 + neg 0,3 + ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move + code. */ + +/* Align copies using VSX instructions to quadword. It is to avoid alignment + traps when memcpy is used on non-cacheable memory (for instance, memory + mapped I/O). */ + andi. 10,3,15 + clrldi 11,4,60 + cmpld cr6,10,11 /* SRC and DST alignments match? */ + + mr dst,3 + bne cr6,L(copy_GE_32_unaligned) + beq L(aligned_copy) + + mtocrf 0x01,0 + clrldi 0,0,60 + +/* Get the DST and SRC aligned to 16 bytes. */ +1: + bf 31,2f + lbz 6,0(src) + addi src,src,1 + stb 6,0(dst) + addi dst,dst,1 +2: + bf 30,4f + lhz 6,0(src) + addi src,src,2 + sth 6,0(dst) + addi dst,dst,2 +4: + bf 29,8f + lwz 6,0(src) + addi src,src,4 + stw 6,0(dst) + addi dst,dst,4 +8: + bf 28,16f + ld 6,0(src) + addi src,src,8 + std 6,0(dst) + addi dst,dst,8 +16: + subf cnt,0,cnt + +/* Main aligned copy loop. Copies 128 bytes at a time. */ +L(aligned_copy): + li 6,16 + li 7,32 + li 8,48 + mtocrf 0x02,cnt + srdi 12,cnt,7 + cmpdi 12,0 + beq L(aligned_tail) + lxvd2x 6,0,src + lxvd2x 7,src,6 + mtctr 12 + b L(aligned_128loop) + + .align 4 +L(aligned_128head): + /* for the 2nd + iteration of this loop. */ + lxvd2x 6,0,src + lxvd2x 7,src,6 +L(aligned_128loop): + lxvd2x 8,src,7 + lxvd2x 9,src,8 + stxvd2x 6,0,dst + addi src,src,64 + stxvd2x 7,dst,6 + stxvd2x 8,dst,7 + stxvd2x 9,dst,8 + lxvd2x 6,0,src + lxvd2x 7,src,6 + addi dst,dst,64 + lxvd2x 8,src,7 + lxvd2x 9,src,8 + addi src,src,64 + stxvd2x 6,0,dst + stxvd2x 7,dst,6 + stxvd2x 8,dst,7 + stxvd2x 9,dst,8 + addi dst,dst,64 + bdnz L(aligned_128head) + +L(aligned_tail): + mtocrf 0x01,cnt + bf 25,32f + lxvd2x 6,0,src + lxvd2x 7,src,6 + lxvd2x 8,src,7 + lxvd2x 9,src,8 + addi src,src,64 + stxvd2x 6,0,dst + stxvd2x 7,dst,6 + stxvd2x 8,dst,7 + stxvd2x 9,dst,8 + addi dst,dst,64 +32: + bf 26,16f + lxvd2x 6,0,src + lxvd2x 7,src,6 + addi src,src,32 + stxvd2x 6,0,dst + stxvd2x 7,dst,6 + addi dst,dst,32 +16: + bf 27,8f + lxvd2x 6,0,src + addi src,src,16 + stxvd2x 6,0,dst + addi dst,dst,16 +8: + bf 28,4f + ld 6,0(src) + addi src,src,8 + std 6,0(dst) + addi dst,dst,8 +4: /* Copies 4~7 bytes. */ + bf 29,L(tail2) + lwz 6,0(src) + stw 6,0(dst) + bf 30,L(tail5) + lhz 7,4(src) + sth 7,4(dst) + bflr 31 + lbz 8,6(src) + stb 8,6(dst) + /* Return original DST pointer. */ + blr + + +/* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32): + mr dst,3 + cmpldi cr6,cnt,8 + mtocrf 0x01,cnt + ble cr6,L(copy_LE_8) + + /* At least 9 bytes to go. */ + neg 8,4 + andi. 0,8,3 + cmpldi cr1,cnt,16 + beq L(copy_LT_32_aligned) + + /* Force 4-byte alignment for SRC. */ + mtocrf 0x01,0 + subf cnt,0,cnt +2: + bf 30,1f + lhz 6,0(src) + addi src,src,2 + sth 6,0(dst) + addi dst,dst,2 +1: + bf 31,L(end_4bytes_alignment) + lbz 6,0(src) + addi src,src,1 + stb 6,0(dst) + addi dst,dst,1 + + .align 4 +L(end_4bytes_alignment): + cmpldi cr1,cnt,16 + mtocrf 0x01,cnt + +L(copy_LT_32_aligned): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ + lwz 6,0(src) + lwz 7,4(src) + stw 6,0(dst) + lwz 8,8(src) + stw 7,4(dst) + lwz 6,12(src) + addi src,src,16 + stw 8,8(dst) + stw 6,12(dst) + addi dst,dst,16 +8: /* Copy 8 bytes. */ + bf 28,L(tail4) + lwz 6,0(src) + lwz 7,4(src) + addi src,src,8 + stw 6,0(dst) + stw 7,4(dst) + addi dst,dst,8 + + .align 4 +/* Copies 4~7 bytes. */ +L(tail4): + bf 29,L(tail2) + lwz 6,0(src) + stw 6,0(dst) + bf 30,L(tail5) + lhz 7,4(src) + sth 7,4(dst) + bflr 31 + lbz 8,6(src) + stb 8,6(dst) + /* Return original DST pointer. */ + blr + + .align 4 +/* Copies 2~3 bytes. */ +L(tail2): + bf 30,1f + lhz 6,0(src) + sth 6,0(dst) + bflr 31 + lbz 7,2(src) + stb 7,2(dst) + blr + + .align 4 +L(tail5): + bflr 31 + lbz 6,4(src) + stb 6,4(dst) + blr + + .align 4 +1: + bflr 31 + lbz 6,0(src) + stb 6,0(dst) + /* Return original DST pointer. */ + blr + + +/* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8): + bne cr6,L(tail4) + + /* Though we could've used ld/std here, they are still + slow for unaligned cases. */ + + lwz 6,0(src) + lwz 7,4(src) + stw 6,0(dst) + stw 7,4(dst) + blr + + +/* Handle copies of 32+ bytes where DST is aligned (to quadword) but + SRC is not. Use aligned quadword loads from SRC, shifted to realign + the data, allowing for aligned DST stores. */ + .align 4 +L(copy_GE_32_unaligned): + clrldi 0,0,60 /* Number of bytes until the 1st dst quadword. */ + srdi 9,cnt,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont) + + /* DST is not quadword aligned, get it aligned. */ + + mtocrf 0x01,0 + subf cnt,0,cnt + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +1: + bf 31,2f + lbz 6,0(src) + addi src,src,1 + stb 6,0(dst) + addi dst,dst,1 +2: + bf 30,4f + lhz 6,0(src) + addi src,src,2 + sth 6,0(dst) + addi dst,dst,2 +4: + bf 29,8f + lwz 6,0(src) + addi src,src,4 + stw 6,0(dst) + addi dst,dst,4 +8: + bf 28,0f + ld 6,0(src) + addi src,src,8 + std 6,0(dst) + addi dst,dst,8 +0: + srdi 9,cnt,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ +L(copy_GE_32_unaligned_cont): + + /* Setup two indexes to speed up the indexed vector operations. */ + clrldi 10,cnt,60 + li 6,16 /* Index for 16-bytes offsets. */ + li 7,32 /* Index for 32-bytes offsets. */ + cmpldi cr1,10,0 + srdi 8,cnt,5 /* Setup the loop counter. */ + mtocrf 0x01,9 + cmpldi cr6,9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr 5,0,src +#else + lvsl 5,0,src +#endif + lvx 3,0,src + li 0,0 + bf 31,L(setup_unaligned_loop) + + /* Copy another 16 bytes to align to 32-bytes due to the loop. */ + lvx 4,src,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + addi src,src,16 + stvx 6,0,dst + addi dst,dst,16 + vor 3,4,4 + clrrdi 0,src,60 + +L(setup_unaligned_loop): + mtctr 8 + ble cr6,L(end_unaligned_loop) + + /* Copy 32 bytes at a time using vector instructions. */ + .align 4 +L(unaligned_loop): + + /* Note: vr6/vr10 may contain data that was already copied, + but in order to get proper alignment, we may have to copy + some portions again. This is faster than having unaligned + vector instructions though. */ + + lvx 4,src,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + lvx 3,src,7 +#ifdef __LITTLE_ENDIAN__ + vperm 10,3,4,5 +#else + vperm 10,4,3,5 +#endif + addi src,src,32 + stvx 6,0,dst + stvx 10,dst,6 + addi dst,dst,32 + bdnz L(unaligned_loop) + + clrrdi 0,src,60 + + .align 4 +L(end_unaligned_loop): + + /* Check for tail bytes. */ + mtocrf 0x01,cnt + beqlr cr1 + + add src,src,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ + /* Copy 8 bytes. */ + bf 28,4f + lwz 6,0(src) + lwz 7,4(src) + addi src,src,8 + stw 6,0(dst) + stw 7,4(dst) + addi dst,dst,8 +4: /* Copy 4~7 bytes. */ + bf 29,L(tail2) + lwz 6,0(src) + stw 6,0(dst) + bf 30,L(tail5) + lhz 7,4(src) + sth 7,4(dst) + bflr 31 + lbz 8,6(src) + stb 8,6(dst) + /* Return original DST pointer. */ + blr + +END_GEN_TB (MEMCPY,TB_TOCLESS) +libc_hidden_builtin_def (memcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memmove.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memmove.S new file mode 100644 index 0000000000..4c0f7c3571 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memmove.S @@ -0,0 +1,835 @@ +/* Optimized memmove implementation for PowerPC64/POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + +/* void* [r3] memmove (void *dest [r3], const void *src [r4], size_t len [r5]) + + This optimization check if memory 'dest' overlaps with 'src'. If it does + not then it calls an optimized memcpy call (similar to memcpy for POWER7, + embedded here to gain some cycles). + If source and destiny overlaps, a optimized backwards memcpy is used + instead. */ + +#ifndef MEMMOVE +# define MEMMOVE memmove +#endif + .machine power7 +EALIGN (MEMMOVE, 5, 0) + CALL_MCOUNT 3 + +L(_memmove): + subf r9,r4,r3 + cmpld cr7,r9,r5 + blt cr7,L(memmove_bwd) + + cmpldi cr1,r5,31 + neg 0,3 + ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move + code. */ + + andi. 10,3,15 + clrldi 11,4,60 + cmpld cr6,10,11 /* SRC and DST alignments match? */ + + mr r11,3 + bne cr6,L(copy_GE_32_unaligned) + beq L(aligned_copy) + + mtocrf 0x01,0 + clrldi 0,0,60 + +/* Get the DST and SRC aligned to 8 bytes (16 for little-endian). */ +1: + bf 31,2f + lbz 6,0(r4) + addi r4,r4,1 + stb 6,0(r11) + addi r11,r11,1 +2: + bf 30,4f + lhz 6,0(r4) + addi r4,r4,2 + sth 6,0(r11) + addi r11,r11,2 +4: + bf 29,8f + lwz 6,0(r4) + addi r4,r4,4 + stw 6,0(r11) + addi r11,r11,4 +8: + bf 28,16f + ld 6,0(r4) + addi r4,r4,8 + std 6,0(r11) + addi r11,r11,8 +16: + subf r5,0,r5 + +/* Main aligned copy loop. Copies 128 bytes at a time. */ +L(aligned_copy): + li 6,16 + li 7,32 + li 8,48 + mtocrf 0x02,r5 + srdi 12,r5,7 + cmpdi 12,0 + beq L(aligned_tail) + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 + mtctr 12 + b L(aligned_128loop) + + .align 4 +L(aligned_128head): + /* for the 2nd + iteration of this loop. */ + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 +L(aligned_128loop): + lxvd2x 8,r4,7 + lxvd2x 9,r4,8 + stxvd2x 6,0,r11 + addi r4,r4,64 + stxvd2x 7,r11,6 + stxvd2x 8,r11,7 + stxvd2x 9,r11,8 + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 + addi r11,r11,64 + lxvd2x 8,r4,7 + lxvd2x 9,r4,8 + addi r4,r4,64 + stxvd2x 6,0,r11 + stxvd2x 7,r11,6 + stxvd2x 8,r11,7 + stxvd2x 9,r11,8 + addi r11,r11,64 + bdnz L(aligned_128head) + +L(aligned_tail): + mtocrf 0x01,r5 + bf 25,32f + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 + lxvd2x 8,r4,7 + lxvd2x 9,r4,8 + addi r4,r4,64 + stxvd2x 6,0,r11 + stxvd2x 7,r11,6 + stxvd2x 8,r11,7 + stxvd2x 9,r11,8 + addi r11,r11,64 +32: + bf 26,16f + lxvd2x 6,0,r4 + lxvd2x 7,r4,6 + addi r4,r4,32 + stxvd2x 6,0,r11 + stxvd2x 7,r11,6 + addi r11,r11,32 +16: + bf 27,8f + lxvd2x 6,0,r4 + addi r4,r4,16 + stxvd2x 6,0,r11 + addi r11,r11,16 +8: + bf 28,4f + ld 6,0(r4) + addi r4,r4,8 + std 6,0(r11) + addi r11,r11,8 +4: /* Copies 4~7 bytes. */ + bf 29,L(tail2) + lwz 6,0(r4) + stw 6,0(r11) + bf 30,L(tail5) + lhz 7,4(r4) + sth 7,4(r11) + bflr 31 + lbz 8,6(r4) + stb 8,6(r11) + /* Return original DST pointer. */ + blr + +/* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32): + mr r11,3 + cmpldi cr6,r5,8 + mtocrf 0x01,r5 + ble cr6,L(copy_LE_8) + + /* At least 9 bytes to go. */ + neg 8,4 + andi. 0,8,3 + cmpldi cr1,r5,16 + beq L(copy_LT_32_aligned) + + /* Force 4-byte alignment for SRC. */ + mtocrf 0x01,0 + subf r5,0,r5 +2: + bf 30,1f + lhz 6,0(r4) + addi r4,r4,2 + sth 6,0(r11) + addi r11,r11,2 +1: + bf 31,L(end_4bytes_alignment) + lbz 6,0(r4) + addi r4,r4,1 + stb 6,0(r11) + addi r11,r11,1 + + .align 4 +L(end_4bytes_alignment): + cmpldi cr1,r5,16 + mtocrf 0x01,r5 + +L(copy_LT_32_aligned): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ + lwz 6,0(r4) + lwz 7,4(r4) + stw 6,0(r11) + lwz 8,8(r4) + stw 7,4(r11) + lwz 6,12(r4) + addi r4,r4,16 + stw 8,8(r11) + stw 6,12(r11) + addi r11,r11,16 +8: /* Copy 8 bytes. */ + bf 28,L(tail4) + lwz 6,0(r4) + lwz 7,4(r4) + addi r4,r4,8 + stw 6,0(r11) + stw 7,4(r11) + addi r11,r11,8 + + .align 4 +/* Copies 4~7 bytes. */ +L(tail4): + bf 29,L(tail2) + lwz 6,0(r4) + stw 6,0(r11) + bf 30,L(tail5) + lhz 7,4(r4) + sth 7,4(r11) + bflr 31 + lbz 8,6(r4) + stb 8,6(r11) + /* Return original DST pointer. */ + blr + + .align 4 +/* Copies 2~3 bytes. */ +L(tail2): + bf 30,1f + lhz 6,0(r4) + sth 6,0(r11) + bflr 31 + lbz 7,2(r4) + stb 7,2(r11) + blr + + .align 4 +L(tail5): + bflr 31 + lbz 6,4(r4) + stb 6,4(r11) + blr + + .align 4 +1: + bflr 31 + lbz 6,0(r4) + stb 6,0(r11) + /* Return original DST pointer. */ + blr + +/* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8): + bne cr6,L(tail4) + + /* Though we could've used ld/std here, they are still + slow for unaligned cases. */ + + lwz 6,0(r4) + lwz 7,4(r4) + stw 6,0(r11) + stw 7,4(r11) + blr + + +/* Handle copies of 32+ bytes where DST is aligned (to quadword) but + SRC is not. Use aligned quadword loads from SRC, shifted to realign + the data, allowing for aligned DST stores. */ + .align 4 +L(copy_GE_32_unaligned): + clrldi 0,0,60 /* Number of bytes until the 1st r11 quadword. */ + srdi 9,r5,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont) + + /* DST is not quadword aligned, get it aligned. */ + + mtocrf 0x01,0 + subf r5,0,r5 + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +1: + bf 31,2f + lbz 6,0(r4) + addi r4,r4,1 + stb 6,0(r11) + addi r11,r11,1 +2: + bf 30,4f + lhz 6,0(r4) + addi r4,r4,2 + sth 6,0(r11) + addi r11,r11,2 +4: + bf 29,8f + lwz 6,0(r4) + addi r4,r4,4 + stw 6,0(r11) + addi r11,r11,4 +8: + bf 28,0f + ld 6,0(r4) + addi r4,r4,8 + std 6,0(r11) + addi r11,r11,8 +0: + srdi 9,r5,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ +L(copy_GE_32_unaligned_cont): + + /* Setup two indexes to speed up the indexed vector operations. */ + clrldi 10,r5,60 + li 6,16 /* Index for 16-bytes offsets. */ + li 7,32 /* Index for 32-bytes offsets. */ + cmpldi cr1,10,0 + srdi 8,r5,5 /* Setup the loop counter. */ + mtocrf 0x01,9 + cmpldi cr6,9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr 5,0,r4 +#else + lvsl 5,0,r4 +#endif + lvx 3,0,r4 + li 0,0 + bf 31,L(setup_unaligned_loop) + + /* Copy another 16 bytes to align to 32-bytes due to the loop. */ + lvx 4,r4,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + addi r4,r4,16 + stvx 6,0,r11 + addi r11,r11,16 + vor 3,4,4 + clrrdi 0,r4,60 + +L(setup_unaligned_loop): + mtctr 8 + ble cr6,L(end_unaligned_loop) + + /* Copy 32 bytes at a time using vector instructions. */ + .align 4 +L(unaligned_loop): + + /* Note: vr6/vr10 may contain data that was already copied, + but in order to get proper alignment, we may have to copy + some portions again. This is faster than having unaligned + vector instructions though. */ + + lvx 4,r4,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + lvx 3,r4,7 +#ifdef __LITTLE_ENDIAN__ + vperm 10,3,4,5 +#else + vperm 10,4,3,5 +#endif + addi r4,r4,32 + stvx 6,0,r11 + stvx 10,r11,6 + addi r11,r11,32 + bdnz L(unaligned_loop) + + clrrdi 0,r4,60 + + .align 4 +L(end_unaligned_loop): + + /* Check for tail bytes. */ + mtocrf 0x01,r5 + beqlr cr1 + + add r4,r4,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ + /* Copy 8 bytes. */ + bf 28,4f + lwz 6,0(r4) + lwz 7,4(r4) + addi r4,r4,8 + stw 6,0(r11) + stw 7,4(r11) + addi r11,r11,8 +4: /* Copy 4~7 bytes. */ + bf 29,L(tail2) + lwz 6,0(r4) + stw 6,0(r11) + bf 30,L(tail5) + lhz 7,4(r4) + sth 7,4(r11) + bflr 31 + lbz 8,6(r4) + stb 8,6(r11) + /* Return original DST pointer. */ + blr + + /* Start to memcpy backward implementation: the algorith first check if + src and dest have the same alignment and if it does align both to 16 + bytes and copy using VSX instructions. + If does not, align dest to 16 bytes and use VMX (altivec) instruction + to read two 16 bytes at time, shift/permute the bytes read and write + aligned to dest. */ +L(memmove_bwd): + cmpldi cr1,r5,31 + /* Copy is done backwards: update the pointers and check alignment. */ + add r11,r3,r5 + add r4,r4,r5 + mr r0,r11 + ble cr1, L(copy_LT_32_bwd) /* If move < 32 bytes use short move + code. */ + + andi. r10,r11,15 /* Check if r11 is aligned to 16 bytes */ + clrldi r9,r4,60 /* Check if r4 is aligned to 16 bytes */ + cmpld cr6,r10,r9 /* SRC and DST alignments match? */ + + bne cr6,L(copy_GE_32_unaligned_bwd) + beq L(aligned_copy_bwd) + + mtocrf 0x01,r0 + clrldi r0,r0,60 + +/* Get the DST and SRC aligned to 16 bytes. */ +1: + bf 31,2f + lbz r6,-1(r4) + subi r4,r4,1 + stb r6,-1(r11) + subi r11,r11,1 +2: + bf 30,4f + lhz r6,-2(r4) + subi r4,r4,2 + sth r6,-2(r11) + subi r11,r11,2 +4: + bf 29,8f + lwz r6,-4(r4) + subi r4,r4,4 + stw r6,-4(r11) + subi r11,r11,4 +8: + bf 28,16f + ld r6,-8(r4) + subi r4,r4,8 + std r6,-8(r11) + subi r11,r11,8 +16: + subf r5,0,r5 + +/* Main aligned copy loop. Copies 128 bytes at a time. */ +L(aligned_copy_bwd): + li r6,-16 + li r7,-32 + li r8,-48 + li r9,-64 + mtocrf 0x02,r5 + srdi r12,r5,7 + cmpdi r12,0 + beq L(aligned_tail_bwd) + lxvd2x v6,r4,r6 + lxvd2x v7,r4,r7 + mtctr 12 + b L(aligned_128loop_bwd) + + .align 4 +L(aligned_128head_bwd): + /* for the 2nd + iteration of this loop. */ + lxvd2x v6,r4,r6 + lxvd2x v7,r4,r7 +L(aligned_128loop_bwd): + lxvd2x v8,r4,r8 + lxvd2x v9,r4,r9 + stxvd2x v6,r11,r6 + subi r4,r4,64 + stxvd2x v7,r11,r7 + stxvd2x v8,r11,r8 + stxvd2x v9,r11,r9 + lxvd2x v6,r4,r6 + lxvd2x v7,r4,7 + subi r11,r11,64 + lxvd2x v8,r4,r8 + lxvd2x v9,r4,r9 + subi r4,r4,64 + stxvd2x v6,r11,r6 + stxvd2x v7,r11,r7 + stxvd2x v8,r11,r8 + stxvd2x v9,r11,r9 + subi r11,r11,64 + bdnz L(aligned_128head_bwd) + +L(aligned_tail_bwd): + mtocrf 0x01,r5 + bf 25,32f + lxvd2x v6,r4,r6 + lxvd2x v7,r4,r7 + lxvd2x v8,r4,r8 + lxvd2x v9,r4,r9 + subi r4,r4,64 + stxvd2x v6,r11,r6 + stxvd2x v7,r11,r7 + stxvd2x v8,r11,r8 + stxvd2x v9,r11,r9 + subi r11,r11,64 +32: + bf 26,16f + lxvd2x v6,r4,r6 + lxvd2x v7,r4,r7 + subi r4,r4,32 + stxvd2x v6,r11,r6 + stxvd2x v7,r11,r7 + subi r11,r11,32 +16: + bf 27,8f + lxvd2x v6,r4,r6 + subi r4,r4,16 + stxvd2x v6,r11,r6 + subi r11,r11,16 +8: + bf 28,4f + ld r6,-8(r4) + subi r4,r4,8 + std r6,-8(r11) + subi r11,r11,8 +4: /* Copies 4~7 bytes. */ + bf 29,L(tail2_bwd) + lwz r6,-4(r4) + stw r6,-4(r11) + bf 30,L(tail5_bwd) + lhz r7,-6(r4) + sth r7,-6(r11) + bflr 31 + lbz r8,-7(r4) + stb r8,-7(r11) + /* Return original DST pointer. */ + blr + +/* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32_bwd): + cmpldi cr6,r5,8 + mtocrf 0x01,r5 + ble cr6,L(copy_LE_8_bwd) + + /* At least 9 bytes to go. */ + neg r8,r4 + andi. r0,r8,3 + cmpldi cr1,r5,16 + beq L(copy_LT_32_aligned_bwd) + + /* Force 4-byte alignment for SRC. */ + mtocrf 0x01,0 + subf r5,0,r5 +2: + bf 30,1f + lhz r6,-2(r4) + subi r4,r4,2 + sth r6,-2(r11) + subi r11,r11,2 +1: + bf 31,L(end_4bytes_alignment_bwd) + lbz 6,-1(r4) + subi r4,r4,1 + stb 6,-1(r11) + subi r11,r11,1 + + .align 4 +L(end_4bytes_alignment_bwd): + cmpldi cr1,r5,16 + mtocrf 0x01,r5 + +L(copy_LT_32_aligned_bwd): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ + lwz r6,-4(r4) + lwz r7,-8(r4) + stw r6,-4(r11) + lwz r8,-12(r4) + stw r7,-8(r11) + lwz r6,-16(r4) + subi r4,r4,16 + stw r8,-12(r11) + stw r6,-16(r11) + subi r11,r11,16 +8: /* Copy 8 bytes. */ + bf 28,L(tail4_bwd) + lwz r6,-4(r4) + lwz r7,-8(r4) + subi r4,r4,8 + stw r6,-4(r11) + stw r7,-8(r11) + subi r11,r11,8 + + .align 4 +/* Copies 4~7 bytes. */ +L(tail4_bwd): + bf 29,L(tail2_bwd) + lwz 6,-4(r4) + stw 6,-4(r11) + bf 30,L(tail5_bwd) + lhz 7,-6(r4) + sth 7,-6(r11) + bflr 31 + lbz 8,-7(r4) + stb 8,-7(r11) + /* Return original DST pointer. */ + blr + + .align 4 +/* Copies 2~3 bytes. */ +L(tail2_bwd): + bf 30,1f + lhz 6,-2(r4) + sth 6,-2(r11) + bflr 31 + lbz 7,-3(r4) + stb 7,-3(r11) + blr + + .align 4 +L(tail5_bwd): + bflr 31 + lbz 6,-5(r4) + stb 6,-5(r11) + blr + + .align 4 +1: + bflr 31 + lbz 6,-1(r4) + stb 6,-1(r11) + /* Return original DST pointer. */ + blr + + +/* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8_bwd): + bne cr6,L(tail4_bwd) + + /* Though we could've used ld/std here, they are still + slow for unaligned cases. */ + lwz 6,-8(r4) + lwz 7,-4(r4) + stw 6,-8(r11) + stw 7,-4(r11) + blr + + +/* Handle copies of 32+ bytes where DST is aligned (to quadword) but + SRC is not. Use aligned quadword loads from SRC, shifted to realign + the data, allowing for aligned DST stores. */ + .align 4 +L(copy_GE_32_unaligned_bwd): + andi. r10,r11,15 /* Check alignment of DST against 16 bytes.. */ + srdi r9,r5,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont_bwd) + + /* DST is not quadword aligned and r10 holds the address masked to + compare alignments. */ + mtocrf 0x01,r10 + subf r5,r10,r5 + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +1: + bf 31,2f + lbz r6,-1(r4) + subi r4,r4,1 + stb r6,-1(r11) + subi r11,r11,1 +2: + bf 30,4f + lhz r6,-2(r4) + subi r4,r4,2 + sth r6,-2(r11) + subi r11,r11,2 +4: + bf 29,8f + lwz r6,-4(r4) + subi r4,r4,4 + stw r6,-4(r11) + subi r11,r11,4 +8: + bf 28,0f + ld r6,-8(r4) + subi r4,r4,8 + std r6,-8(r11) + subi r11,r11,8 +0: + srdi r9,r5,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ +L(copy_GE_32_unaligned_cont_bwd): + + /* Setup two indexes to speed up the indexed vector operations. */ + clrldi r10,r5,60 + li r6,-16 /* Index for 16-bytes offsets. */ + li r7,-32 /* Index for 32-bytes offsets. */ + cmpldi cr1,10,0 + srdi r8,r5,5 /* Setup the loop counter. */ + mtocrf 0x01,9 + cmpldi cr6,r9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr v5,r0,r4 +#else + lvsl v5,r0,r4 +#endif + lvx v3,0,r4 + li r0,0 + bf 31,L(setup_unaligned_loop_bwd) + + /* Copy another 16 bytes to align to 32-bytes due to the loop. */ + lvx v4,r4,r6 +#ifdef __LITTLE_ENDIAN__ + vperm v6,v3,v4,v5 +#else + vperm v6,v4,v3,v5 +#endif + subi r4,r4,16 + stvx v6,r11,r6 + subi r11,r11,16 + vor v3,v4,v4 + clrrdi r0,r4,60 + +L(setup_unaligned_loop_bwd): + mtctr r8 + ble cr6,L(end_unaligned_loop_bwd) + + /* Copy 32 bytes at a time using vector instructions. */ + .align 4 +L(unaligned_loop_bwd): + + /* Note: vr6/vr10 may contain data that was already copied, + but in order to get proper alignment, we may have to copy + some portions again. This is faster than having unaligned + vector instructions though. */ + + lvx v4,r4,r6 +#ifdef __LITTLE_ENDIAN__ + vperm v6,v3,v4,v5 +#else + vperm v6,v4,v3,v5 +#endif + lvx v3,r4,r7 +#ifdef __LITTLE_ENDIAN__ + vperm v10,v4,v3,v5 +#else + vperm v10,v3,v4,v5 +#endif + subi r4,r4,32 + stvx v6,r11,r6 + stvx v10,r11,r7 + subi r11,r11,32 + bdnz L(unaligned_loop_bwd) + + clrrdi r0,r4,60 + + .align 4 +L(end_unaligned_loop_bwd): + + /* Check for tail bytes. */ + mtocrf 0x01,r5 + beqlr cr1 + + add r4,r4,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ + /* Copy 8 bytes. */ + bf 28,4f + lwz r6,-4(r4) + lwz r7,-8(r4) + subi r4,r4,8 + stw r6,-4(r11) + stw r7,-8(r11) + subi r11,r11,8 +4: /* Copy 4~7 bytes. */ + bf 29,L(tail2_bwd) + lwz r6,-4(r4) + stw r6,-4(r11) + bf 30,L(tail5_bwd) + lhz r7,-6(r4) + sth r7,-6(r11) + bflr 31 + lbz r8,-7(r4) + stb r8,-7(r11) + /* Return original DST pointer. */ + blr +END_GEN_TB (MEMMOVE, TB_TOCLESS) +libc_hidden_builtin_def (memmove) + + +/* void bcopy(const void *src [r3], void *dest [r4], size_t n [r5]) + Implemented in this file to avoid linker create a stub function call + in the branch to '_memmove'. */ +ENTRY (__bcopy) + mr r6,r3 + mr r3,r4 + mr r4,r6 + b L(_memmove) +END (__bcopy) +weak_alias (__bcopy, bcopy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/mempcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/mempcpy.S new file mode 100644 index 0000000000..4e15d1e40c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/mempcpy.S @@ -0,0 +1,472 @@ +/* Optimized mempcpy implementation for POWER7. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + +/* __ptr_t [r3] __mempcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); + Returns 'dst' + 'len'. */ + +#ifndef MEMPCPY +# define MEMPCPY __mempcpy +#endif + .machine power7 +EALIGN (MEMPCPY, 5, 0) + CALL_MCOUNT 3 + + cmpldi cr1,5,31 + neg 0,3 + std 3,-16(1) + std 31,-8(1) + cfi_offset(31,-8) + ble cr1,L(copy_LT_32) /* If move < 32 bytes use short move + code. */ + + andi. 11,3,7 /* Check alignment of DST. */ + + + clrldi 10,4,61 /* Check alignment of SRC. */ + cmpld cr6,10,11 /* SRC and DST alignments match? */ + mr 12,4 + mr 31,5 + bne cr6,L(copy_GE_32_unaligned) + + srdi 9,5,3 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_aligned_cont) + + clrldi 0,0,61 + mtcrf 0x01,0 + subf 31,0,5 + + /* Get the SRC aligned to 8 bytes. */ + +1: bf 31,2f + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: bf 30,4f + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: bf 29,0f + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +0: + clrldi 10,12,61 /* Check alignment of SRC again. */ + srdi 9,31,3 /* Number of full doublewords remaining. */ + +L(copy_GE_32_aligned_cont): + + clrldi 11,31,61 + mtcrf 0x01,9 + + srdi 8,31,5 + cmpldi cr1,9,4 + cmpldi cr6,11,0 + mr 11,12 + + /* Copy 1~3 doublewords so the main loop starts + at a multiple of 32 bytes. */ + + bf 30,1f + ld 6,0(12) + ld 7,8(12) + addi 11,12,16 + mtctr 8 + std 6,0(3) + std 7,8(3) + addi 10,3,16 + bf 31,4f + ld 0,16(12) + std 0,16(3) + blt cr1,3f + addi 11,12,24 + addi 10,3,24 + b 4f + + .align 4 +1: /* Copy 1 doubleword and set the counter. */ + mr 10,3 + mtctr 8 + bf 31,4f + ld 6,0(12) + addi 11,12,8 + std 6,0(3) + addi 10,3,8 + + /* Main aligned copy loop. Copies 32-bytes at a time. */ + .align 4 +4: + ld 6,0(11) + ld 7,8(11) + ld 8,16(11) + ld 0,24(11) + addi 11,11,32 + + std 6,0(10) + std 7,8(10) + std 8,16(10) + std 0,24(10) + addi 10,10,32 + bdnz 4b +3: + + /* Check for tail bytes. */ + rldicr 0,31,0,60 + mtcrf 0x01,31 + beq cr6,0f + +.L9: + add 3,3,0 + add 12,12,0 + + /* At this point we have a tail of 0-7 bytes and we know that the + destination is doubleword-aligned. */ +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2 bytes. */ + bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return DST + LEN pointer. */ + ld 31,-8(1) + ld 3,-16(1) + add 3,3,5 + blr + + /* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32): + cmpldi cr6,5,8 + mr 12,4 + mtcrf 0x01,5 + ble cr6,L(copy_LE_8) + + /* At least 9 bytes to go. */ + neg 8,4 + clrrdi 11,4,2 + andi. 0,8,3 + cmpldi cr1,5,16 + mr 10,5 + beq L(copy_LT_32_aligned) + + /* Force 4-bytes alignment for SRC. */ + mtocrf 0x01,0 + subf 10,0,5 +2: bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: bf 31,L(end_4bytes_alignment) + + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 + + .align 4 +L(end_4bytes_alignment): + cmpldi cr1,10,16 + mtcrf 0x01,10 + +L(copy_LT_32_aligned): + /* At least 6 bytes to go, and SRC is word-aligned. */ + blt cr1,8f + + /* Copy 16 bytes. */ + lwz 6,0(12) + lwz 7,4(12) + stw 6,0(3) + lwz 8,8(12) + stw 7,4(3) + lwz 6,12(12) + addi 12,12,16 + stw 8,8(3) + stw 6,12(3) + addi 3,3,16 +8: /* Copy 8 bytes. */ + bf 28,4f + + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2-3 bytes. */ + bf 30,1f + + lhz 6,0(12) + sth 6,0(3) + bf 31,0f + lbz 7,2(12) + stb 7,2(3) + ld 3,-16(1) + add 3,3,5 + blr + + .align 4 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return DST + LEN pointer. */ + ld 3,-16(1) + add 3,3,5 + blr + + /* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8): + bne cr6,4f + + /* Though we could've used ld/std here, they are still + slow for unaligned cases. */ + + lwz 6,0(4) + lwz 7,4(4) + stw 6,0(3) + stw 7,4(3) + ld 3,-16(1) /* Return DST + LEN pointer. */ + add 3,3,5 + blr + + .align 4 +4: /* Copies 4~7 bytes. */ + bf 29,2b + + lwz 6,0(4) + stw 6,0(3) + bf 30,5f + lhz 7,4(4) + sth 7,4(3) + bf 31,0f + lbz 8,6(4) + stb 8,6(3) + ld 3,-16(1) + add 3,3,5 + blr + + .align 4 +5: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,4(4) + stb 6,4(3) + +0: /* Return DST + LEN pointer. */ + ld 3,-16(1) + add 3,3,5 + blr + + /* Handle copies of 32+ bytes where DST is aligned (to quadword) but + SRC is not. Use aligned quadword loads from SRC, shifted to realign + the data, allowing for aligned DST stores. */ + .align 4 +L(copy_GE_32_unaligned): + clrldi 0,0,60 /* Number of bytes until the 1st + quadword. */ + andi. 11,3,15 /* Check alignment of DST (against + quadwords). */ + srdi 9,5,4 /* Number of full quadwords remaining. */ + + beq L(copy_GE_32_unaligned_cont) + + /* SRC is not quadword aligned, get it aligned. */ + + mtcrf 0x01,0 + subf 31,0,5 + + /* Vector instructions work best when proper alignment (16-bytes) + is present. Move 0~15 bytes as needed to get DST quadword-aligned. */ +1: /* Copy 1 byte. */ + bf 31,2f + + lbz 6,0(12) + addi 12,12,1 + stb 6,0(3) + addi 3,3,1 +2: /* Copy 2 bytes. */ + bf 30,4f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +4: /* Copy 4 bytes. */ + bf 29,8f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +8: /* Copy 8 bytes. */ + bf 28,0f + + ld 6,0(12) + addi 12,12,8 + std 6,0(3) + addi 3,3,8 +0: + clrldi 10,12,60 /* Check alignment of SRC. */ + srdi 9,31,4 /* Number of full quadwords remaining. */ + + /* The proper alignment is present, it is OK to copy the bytes now. */ +L(copy_GE_32_unaligned_cont): + + /* Setup two indexes to speed up the indexed vector operations. */ + clrldi 11,31,60 + li 6,16 /* Index for 16-bytes offsets. */ + li 7,32 /* Index for 32-bytes offsets. */ + cmpldi cr1,11,0 + srdi 8,31,5 /* Setup the loop counter. */ + mr 10,3 + mr 11,12 + mtcrf 0x01,9 + cmpldi cr6,9,1 +#ifdef __LITTLE_ENDIAN__ + lvsr 5,0,12 +#else + lvsl 5,0,12 +#endif + lvx 3,0,12 + bf 31,L(setup_unaligned_loop) + + /* Copy another 16 bytes to align to 32-bytes due to the loop . */ + lvx 4,12,6 +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + addi 11,12,16 + addi 10,3,16 + stvx 6,0,3 + vor 3,4,4 + +L(setup_unaligned_loop): + mtctr 8 + ble cr6,L(end_unaligned_loop) + + /* Copy 32 bytes at a time using vector instructions. */ + .align 4 +L(unaligned_loop): + + /* Note: vr6/vr10 may contain data that was already copied, + but in order to get proper alignment, we may have to copy + some portions again. This is faster than having unaligned + vector instructions though. */ + + lvx 4,11,6 /* vr4 = r11+16. */ +#ifdef __LITTLE_ENDIAN__ + vperm 6,4,3,5 +#else + vperm 6,3,4,5 +#endif + lvx 3,11,7 /* vr3 = r11+32. */ +#ifdef __LITTLE_ENDIAN__ + vperm 10,3,4,5 +#else + vperm 10,4,3,5 +#endif + addi 11,11,32 + stvx 6,0,10 + stvx 10,10,6 + addi 10,10,32 + + bdnz L(unaligned_loop) + + .align 4 +L(end_unaligned_loop): + + /* Check for tail bytes. */ + rldicr 0,31,0,59 + mtcrf 0x01,31 + beq cr1,0f + + add 3,3,0 + add 12,12,0 + + /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */ +8: /* Copy 8 bytes. */ + bf 28,4f + + lwz 6,0(12) + lwz 7,4(12) + addi 12,12,8 + stw 6,0(3) + stw 7,4(3) + addi 3,3,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + lwz 6,0(12) + addi 12,12,4 + stw 6,0(3) + addi 3,3,4 +2: /* Copy 2~3 bytes. */ + bf 30,1f + + lhz 6,0(12) + addi 12,12,2 + sth 6,0(3) + addi 3,3,2 +1: /* Copy 1 byte. */ + bf 31,0f + + lbz 6,0(12) + stb 6,0(3) +0: /* Return DST + LEN pointer. */ + ld 31,-8(1) + ld 3,-16(1) + add 3,3,5 + blr + +END_GEN_TB (MEMPCPY,TB_TOCLESS) +libc_hidden_def (__mempcpy) +weak_alias (__mempcpy, mempcpy) +libc_hidden_builtin_def (mempcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memrchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memrchr.S new file mode 100644 index 0000000000..4276768915 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memrchr.S @@ -0,0 +1,201 @@ +/* Optimized memrchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5]) */ + +#ifndef MEMRCHR +# define MEMRCHR __memrchr +#endif + .machine power7 +ENTRY (MEMRCHR) + CALL_MCOUNT 3 + add r7,r3,r5 /* Calculate the last acceptable address. */ + neg r0,r7 + addi r7,r7,-1 + mr r10,r3 + clrrdi r6,r7,7 + li r9,3<<5 + dcbt r9,r6,8 /* Stream hint, decreasing addresses. */ + + /* Replicate BYTE to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + li r6,-8 + li r9,-1 + rlwinm r0,r0,3,26,28 /* Calculate padding. */ + clrrdi r8,r7,3 + srd r9,r9,r0 + cmpldi r5,32 + clrrdi r0,r10,3 + ble L(small_range) + +#ifdef __LITTLE_ENDIAN__ + ldx r12,0,r8 +#else + ldbrx r12,0,r8 /* Load reversed doubleword from memory. */ +#endif + cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */ + and r3,r3,r9 + cmpldi cr7,r3,0 /* If r3 == 0, no BYTEs have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + bf 28,L(loop_setup) + + /* Handle DWORD2 of pair. */ +#ifdef __LITTLE_ENDIAN__ + ldx r12,r8,r6 +#else + ldbrx r12,r8,r6 +#endif + addi r8,r8,-8 + cmpb r3,r12,r4 + cmpldi cr7,r3,0 + bne cr7,L(done) + +L(loop_setup): + /* The last dword we want to read in the loop below is the one + containing the first byte of the string, ie. the dword at + s & ~7, or r0. The first dword read is at r8 - 8, we + read 2 * cnt dwords, so the last dword read will be at + r8 - 8 - 16 * cnt + 8. Solving for cnt gives + cnt = (r8 - r0) / 16 */ + sub r5,r8,r0 + addi r8,r8,-8 + srdi r9,r5,4 /* Number of loop iterations. */ + mtctr r9 /* Setup the counter. */ + + /* Main loop to look for BYTE backwards in the string. + FIXME: Investigate whether 32 byte align helps with this + 9 instruction loop. */ + .align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + +#ifdef __LITTLE_ENDIAN__ + ldx r12,0,r8 + ldx r11,r8,r6 +#else + ldbrx r12,0,r8 + ldbrx r11,r8,r6 +#endif + cmpb r3,r12,r4 + cmpb r9,r11,r4 + or r5,r9,r3 /* Merge everything in one doubleword. */ + cmpldi cr7,r5,0 + bne cr7,L(found) + addi r8,r8,-16 + bdnz L(loop) + + /* We may have one more word to read. */ + cmpld r8,r0 + bnelr + +#ifdef __LITTLE_ENDIAN__ + ldx r12,0,r8 +#else + ldbrx r12,0,r8 +#endif + cmpb r3,r12,r4 + cmpldi cr7,r3,0 + bne cr7,L(done) + blr + + .align 4 +L(found): + /* OK, one (or both) of the dwords contains BYTE. Check + the first dword. */ + cmpldi cr6,r3,0 + bne cr6,L(done) + + /* BYTE must be in the second word. Adjust the address + again and move the result of cmpb to r3 so we can calculate the + pointer. */ + + mr r3,r9 + addi r8,r8,-8 + + /* r3 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as BYTE in the original + word from the string. Use that to calculate the pointer. + We need to make sure BYTE is *before* the end of the + range. */ +L(done): + cntlzd r9,r3 /* Count leading zeros before the match. */ + cmpld r8,r0 /* Are we on the last word? */ + srdi r6,r9,3 /* Convert leading zeros to bytes. */ + addi r0,r6,-7 + sub r3,r8,r0 + cmpld cr7,r3,r10 + bnelr + bgelr cr7 + li r3,0 + blr + + .align 4 +L(null): + li r3,0 + blr + +/* Deals with size <= 32. */ + .align 4 +L(small_range): + cmpldi r5,0 + beq L(null) + +#ifdef __LITTLE_ENDIAN__ + ldx r12,0,r8 +#else + ldbrx r12,0,r8 /* Load reversed doubleword from memory. */ +#endif + cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */ + and r3,r3,r9 + cmpldi cr7,r3,0 + bne cr7,L(done) + + /* Are we done already? */ + cmpld r8,r0 + addi r8,r8,-8 + beqlr + + .align 5 +L(loop_small): +#ifdef __LITTLE_ENDIAN__ + ldx r12,0,r8 +#else + ldbrx r12,0,r8 +#endif + cmpb r3,r12,r4 + cmpld r8,r0 + cmpldi cr7,r3,0 + bne cr7,L(done) + addi r8,r8,-8 + bne L(loop_small) + blr + +END (MEMRCHR) +weak_alias (__memrchr, memrchr) +libc_hidden_builtin_def (memrchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memset.S new file mode 100644 index 0000000000..21933c0672 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/memset.S @@ -0,0 +1,399 @@ +/* Optimized memset implementation for PowerPC64/POWER7. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. */ + +#ifndef MEMSET +# define MEMSET memset +#endif + .machine power7 +EALIGN (MEMSET, 5, 0) + CALL_MCOUNT 3 + +L(_memset): + cmpldi cr7,5,31 + cmpldi cr6,5,8 + mr 10,3 + + /* Replicate byte to word. */ + insrdi 4,4,8,48 + insrdi 4,4,16,32 + ble cr6,L(small) /* If length <= 8, use short copy code. */ + + neg 0,3 + ble cr7,L(medium) /* If length < 32, use medium copy code. */ + + andi. 11,10,7 /* Check alignment of SRC. */ + insrdi 4,4,32,0 /* Replicate word to double word. */ + + mr 12,5 + beq L(big_aligned) + + clrldi 0,0,61 + mtocrf 0x01,0 + subf 5,0,5 + + /* Get DST aligned to 8 bytes. */ +1: bf 31,2f + + stb 4,0(10) + addi 10,10,1 +2: bf 30,4f + + sth 4,0(10) + addi 10,10,2 +4: bf 29,L(big_aligned) + + stw 4,0(10) + addi 10,10,4 + + .align 4 +L(big_aligned): + + cmpldi cr5,5,255 + li 0,32 + dcbtst 0,10 + cmpldi cr6,4,0 + srdi 9,5,3 /* Number of full doublewords remaining. */ + crand 27,26,21 + mtocrf 0x01,9 + bt 27,L(huge) + + /* From this point on, we'll copy 32+ bytes and the value + isn't 0 (so we can't use dcbz). */ + + srdi 8,5,5 + clrldi 11,5,61 + cmpldi cr6,11,0 + cmpldi cr1,9,4 + mtctr 8 + + /* Copy 1~3 doublewords so the main loop starts + at a multiple of 32 bytes. */ + + bf 30,1f + + std 4,0(10) + std 4,8(10) + addi 10,10,16 + bf 31,L(big_loop) + + std 4,0(10) + addi 10,10,8 + mr 12,10 + blt cr1,L(tail_bytes) + b L(big_loop) + + .align 4 +1: /* Copy 1 doubleword. */ + bf 31,L(big_loop) + + std 4,0(10) + addi 10,10,8 + + /* Main aligned copy loop. Copies 32-bytes at a time and + ping-pong through r10 and r12 to avoid AGEN delays. */ + .align 4 +L(big_loop): + addi 12,10,32 + std 4,0(10) + std 4,8(10) + std 4,16(10) + std 4,24(10) + bdz L(tail_bytes) + + addi 10,10,64 + std 4,0(12) + std 4,8(12) + std 4,16(12) + std 4,24(12) + bdnz L(big_loop) + + mr 12,10 + b L(tail_bytes) + + .align 4 +L(tail_bytes): + + /* Check for tail bytes. */ + beqlr cr6 + + clrldi 0,5,61 + mtocrf 0x01,0 + + /* At this point we have a tail of 0-7 bytes and we know that the + destination is doubleword-aligned. */ +4: /* Copy 4 bytes. */ + bf 29,2f + + stw 4,0(12) + addi 12,12,4 +2: /* Copy 2 bytes. */ + bf 30,1f + + sth 4,0(12) + addi 12,12,2 +1: /* Copy 1 byte. */ + bflr 31 + + stb 4,0(12) + blr + + /* Special case when value is 0 and we have a long length to deal + with. Use dcbz to zero out 128-bytes at a time. Before using + dcbz though, we need to get the destination 128-bytes aligned. */ + .align 4 +L(huge): + andi. 11,10,127 + neg 0,10 + beq L(huge_aligned) + + clrldi 0,0,57 + subf 5,0,5 + srdi 0,0,3 + mtocrf 0x01,0 + + /* Get DST aligned to 128 bytes. */ +8: bf 28,4f + + std 4,0(10) + std 4,8(10) + std 4,16(10) + std 4,24(10) + std 4,32(10) + std 4,40(10) + std 4,48(10) + std 4,56(10) + addi 10,10,64 + .align 4 +4: bf 29,2f + + std 4,0(10) + std 4,8(10) + std 4,16(10) + std 4,24(10) + addi 10,10,32 + .align 4 +2: bf 30,1f + + std 4,0(10) + std 4,8(10) + addi 10,10,16 + .align 4 +1: bf 31,L(huge_aligned) + + std 4,0(10) + addi 10,10,8 + + +L(huge_aligned): + srdi 8,5,7 + clrldi 11,5,57 + cmpldi cr6,11,0 + mtctr 8 + + .align 4 +L(huge_loop): + dcbz 0,10 + addi 10,10,128 + bdnz L(huge_loop) + + /* Check how many bytes are still left. */ + beqlr cr6 + + subf 9,3,10 + subf 5,9,12 + srdi 8,5,3 + cmpldi cr6,8,0 + mtocrf 0x01,8 + + /* We have a tail o 1~127 bytes. Copy up to 15 doublewords for + speed. We'll handle the resulting tail bytes later. */ + beq cr6,L(tail) + +8: bf 28,4f + + std 4,0(10) + std 4,8(10) + std 4,16(10) + std 4,24(10) + std 4,32(10) + std 4,40(10) + std 4,48(10) + std 4,56(10) + addi 10,10,64 + .align 4 +4: bf 29,2f + + std 4,0(10) + std 4,8(10) + std 4,16(10) + std 4,24(10) + addi 10,10,32 + .align 4 +2: bf 30,1f + + std 4,0(10) + std 4,8(10) + addi 10,10,16 + .align 4 +1: bf 31,L(tail) + + std 4,0(10) + addi 10,10,8 + + /* Handle the rest of the tail bytes here. */ +L(tail): + mtocrf 0x01,5 + + .align 4 +4: bf 29,2f + + stw 4,0(10) + addi 10,10,4 + .align 4 +2: bf 30,1f + + sth 4,0(10) + addi 10,10,2 + .align 4 +1: bflr 31 + + stb 4,0(10) + blr + + /* Expanded tree to copy tail bytes without increments. */ + .align 4 +L(copy_tail): + bf 29,L(FXX) + + stw 4,0(10) + bf 30,L(TFX) + + sth 4,4(10) + bflr 31 + + stb 4,6(10) + blr + + .align 4 +L(FXX): bf 30,L(FFX) + + sth 4,0(10) + bflr 31 + + stb 4,2(10) + blr + + .align 4 +L(TFX): bflr 31 + + stb 4,4(10) + blr + + .align 4 +L(FFX): bflr 31 + + stb 4,0(10) + blr + + /* Handle copies of 9~31 bytes. */ + .align 4 +L(medium): + /* At least 9 bytes to go. */ + andi. 11,10,3 + clrldi 0,0,62 + beq L(medium_aligned) + + /* Force 4-bytes alignment for DST. */ + mtocrf 0x01,0 + subf 5,0,5 +1: /* Copy 1 byte. */ + bf 31,2f + + stb 4,0(10) + addi 10,10,1 +2: /* Copy 2 bytes. */ + bf 30,L(medium_aligned) + + sth 4,0(10) + addi 10,10,2 + + .align 4 +L(medium_aligned): + /* At least 6 bytes to go, and DST is word-aligned. */ + cmpldi cr1,5,16 + mtocrf 0x01,5 + blt cr1,8f + + /* Copy 16 bytes. */ + stw 4,0(10) + stw 4,4(10) + stw 4,8(10) + stw 4,12(10) + addi 10,10,16 +8: /* Copy 8 bytes. */ + bf 28,4f + + stw 4,0(10) + stw 4,4(10) + addi 10,10,8 +4: /* Copy 4 bytes. */ + bf 29,2f + + stw 4,0(10) + addi 10,10,4 +2: /* Copy 2-3 bytes. */ + bf 30,1f + + sth 4,0(10) + addi 10,10,2 +1: /* Copy 1 byte. */ + bflr 31 + + stb 4,0(10) + blr + + /* Handles copies of 0~8 bytes. */ + .align 4 +L(small): + mtocrf 0x01,5 + bne cr6,L(copy_tail) + + stw 4,0(10) + stw 4,4(10) + blr + +END_GEN_TB (MEMSET,TB_TOCLESS) +libc_hidden_builtin_def (memset) + +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. */ +ENTRY (__bzero) + CALL_MCOUNT 3 + mr r5,r4 + li r4,0 + b L(_memset) +END (__bzero) +#ifndef __bzero +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/multiarch/Implies new file mode 100644 index 0000000000..bf5d6171a5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power6/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/rawmemchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/rawmemchr.S new file mode 100644 index 0000000000..48afb75943 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/rawmemchr.S @@ -0,0 +1,115 @@ +/* Optimized rawmemchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] rawmemchr (void *s [r3], int c [r4]) */ + +#ifndef RAWMEMCHR +# define RAWMEMCHR __rawmemchr +#endif + .machine power7 +ENTRY (RAWMEMCHR) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + /* Now r4 has a doubleword of c bytes. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + ld r12,0(r8) /* Load doubleword from memory. */ + cmpb r5,r12,r4 /* Compare each byte against c byte. */ +#ifdef __LITTLE_ENDIAN__ + srd r5,r5,r6 + sld r5,r5,r6 +#else + sld r5,r5,r6 /* Move left to discard ignored bits. */ + srd r5,r5,r6 /* Bring the bits back as zeros. */ +#endif + cmpdi cr7,r5,0 /* If r5 == 0, no c bytes have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle DWORD2 of pair. */ + ldu r12,8(r8) + cmpb r5,r12,r4 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the byte-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r5,r12,r4 + cmpb r6,r11,r4 + or r7,r5,r6 + cmpdi cr7,r7,0 + beq cr7,L(loop) + + /* OK, one (or both) of the doublewords contains a 'c' byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The 'c' byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + mr r5,r6 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the 'c' byte in the original + doubleword from the string. Use that fact to find out what is + the position of the byte inside the string. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 /* Count trailing zeros. */ +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching char. */ + blr +END (RAWMEMCHR) +weak_alias (__rawmemchr,rawmemchr) +libc_hidden_builtin_def (__rawmemchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/stpncpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/stpncpy.S new file mode 100644 index 0000000000..a346dd7e28 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/stpncpy.S @@ -0,0 +1,24 @@ +/* Optimized stpncpy implementation for PowerPC64/POWER7. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STPNCPY +#include <sysdeps/powerpc/powerpc64/power7/strncpy.S> + +weak_alias (__stpncpy, stpncpy) +libc_hidden_def (__stpncpy) +libc_hidden_builtin_def (stpncpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcasecmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcasecmp.S new file mode 100644 index 0000000000..e856b8a593 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcasecmp.S @@ -0,0 +1,126 @@ +/* Optimized strcasecmp implementation for PowerPC64. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <locale-defines.h> + +/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) + + or if defined USE_IN_EXTENDED_LOCALE_MODEL: + + int [r3] strcasecmp_l (const char *s1 [r3], const char *s2 [r4], + __locale_t loc [r5]) */ + +#ifndef STRCMP +# define __STRCMP __strcasecmp +# define STRCMP strcasecmp +#endif + +ENTRY (__STRCMP) +#ifndef USE_IN_EXTENDED_LOCALE_MODEL + CALL_MCOUNT 2 +#else + CALL_MCOUNT 3 +#endif + +#define rRTN r3 /* Return value */ +#define rSTR1 r5 /* 1st string */ +#define rSTR2 r4 /* 2nd string */ +#define rLOCARG r5 /* 3rd argument: locale_t */ +#define rCHAR1 r6 /* Byte read from 1st string */ +#define rCHAR2 r7 /* Byte read from 2nd string */ +#define rADDR1 r8 /* Address of tolower(rCHAR1) */ +#define rADDR2 r12 /* Address of tolower(rCHAR2) */ +#define rLWR1 r8 /* Word tolower(rCHAR1) */ +#define rLWR2 r12 /* Word tolower(rCHAR2) */ +#define rTMP r9 +#define rLOC r11 /* Default locale address */ + + cmpd cr7, r3, r4 +#ifndef USE_IN_EXTENDED_LOCALE_MODEL + ld rTMP, __libc_tsd_LOCALE@got@tprel(r2) + add rLOC, rTMP, __libc_tsd_LOCALE@tls + ld rLOC, 0(rLOC) +#else + mr rLOC, rLOCARG +#endif + ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC) + mr rSTR1, rRTN + li rRTN, 0 + beqlr cr7 + + + /* Unrolling loop for POWER: loads are done with 'lbz' plus + offset and string descriptors are only updated in the end + of loop unrolling. */ + + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ +L(loop): + cmpdi rCHAR1, 0 /* *s1 == '\0' ? */ + sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */ + sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */ + lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */ + lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */ + cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */ + crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */ + beq cr1, L(done) + lbz rCHAR1, 1(rSTR1) + lbz rCHAR2, 1(rSTR2) + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 2(rSTR1) + lbz rCHAR2, 2(rSTR2) + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 3(rSTR1) + lbz rCHAR2, 3(rSTR2) + cmpdi rCHAR1, 0 + /* Increment both string descriptors */ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1,L(done) + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ + b L(loop) +L(done): + subf r0, rLWR2, rLWR1 + extsw rRTN, r0 + blr +END (__STRCMP) + +weak_alias (__STRCMP, STRCMP) +libc_hidden_builtin_def (__STRCMP) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S new file mode 100644 index 0000000000..c13c4ebcb8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcasecmp_l.S @@ -0,0 +1,5 @@ +#define USE_IN_EXTENDED_LOCALE_MODEL +#define STRCMP strcasecmp_l +#define __STRCMP __strcasecmp_l + +#include "strcasecmp.S" diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strchr.S new file mode 100644 index 0000000000..a18e2e101c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strchr.S @@ -0,0 +1,230 @@ +/* Optimized strchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRCHR +# define STRCHR strchr +#endif + +/* int [r3] strchr (char *s [r3], int c [r4]) */ + .machine power7 +ENTRY (STRCHR) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + cmpdi cr7,r4,0 + ld r12,0(r8) /* Load doubleword from memory. */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + + beq cr7,L(null_match) + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + /* Now r4 has a doubleword of c bytes and r0 has + a doubleword of null bytes. */ + + cmpb r10,r12,r4 /* Compare each byte against c byte. */ + cmpb r11,r12,r0 /* Compare each byte against null byte. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + srd r11,r11,r6 + sld r10,r10,r6 + sld r11,r11,r6 +#else + sld r10,r10,r6 + sld r11,r11,r6 + srd r10,r10,r6 + srd r11,r11,r6 +#endif + or r5,r10,r11 /* OR the results to speed things up. */ + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r9,16(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + cmpb r6,r9,r4 + cmpb r7,r9,r0 + or r12,r10,r11 + or r9,r6,r7 + or r5,r12,r9 + cmpdi cr7,r5,0 + beq cr7,L(loop) + + /* OK, one (or both) of the doublewords contains a c/null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c/null byte. */ + + cmpdi cr6,r12,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The c/null byte must be in the second doubleword. Adjust the + address again and move the result of cmpb to r10 so we can calculate + the pointer. */ + + mr r10,r6 + mr r11,r7 + addi r8,r8,8 + + /* r10/r11 have the output of the cmpb instructions, that is, + 0xff in the same position as the c/null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r3,r10,-1 + andc r3,r3,r10 + popcntd r0,r3 + addi r4,r11,-1 + andc r4,r4,r11 + cmpld cr7,r3,r4 + bgt cr7,L(no_match) +#else + cntlzd r0,r10 /* Count leading zeros before c matches. */ + cmpld cr7,r11,r10 + bgt cr7,L(no_match) +#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching c byte + or null in case c was not found. */ + blr + + .align 4 +L(no_match): + li r3,0 + blr + +/* We are here because strchr was called with a null byte. */ + .align 4 +L(null_match): + /* r0 has a doubleword of null bytes. */ + + cmpb r5,r12,r0 /* Compare each byte against null bytes. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r5,r5,r6 + sld r5,r5,r6 +#else + sld r5,r5,r6 + srd r5,r5,r6 +#endif + cmpdi cr7,r5,0 /* If r10 == 0, no c or null bytes + have been found. */ + bne cr7,L(done_null) + + mtcrf 0x01,r8 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop_null) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r5,r12,r0 + cmpdi cr7,r5,0 + bne cr7,L(done_null) + b L(loop_null) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop_null): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r5,r12,r0 + cmpb r10,r11,r0 + or r6,r5,r10 + cmpdi cr7,r6,0 + beq cr7,L(loop_null) + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done_null) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + + mr r5,r10 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done_null): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching null byte. */ + blr +END (STRCHR) +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strchrnul.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strchrnul.S new file mode 100644 index 0000000000..27bc1f0682 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strchrnul.S @@ -0,0 +1,131 @@ +/* Optimized strchrnul implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRCHRNUL +# define STRCHRNUL __strchrnul +#endif +/* int [r3] strchrnul (char *s [r3], int c [r4]) */ + .machine power7 +ENTRY (STRCHRNUL) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + ld r12,0(r8) /* Load doubleword from memory. */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + /* Now r4 has a doubleword of c bytes and r0 has + a doubleword of null bytes. */ + + cmpb r10,r12,r0 /* Compare each byte against c byte. */ + cmpb r9,r12,r4 /* Compare each byte against null byte. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and to bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + srd r9,r9,r6 + sld r10,r10,r6 + sld r9,r9,r6 +#else + sld r10,r10,r6 + sld r9,r9,r6 + srd r10,r10,r6 + srd r9,r9,r6 +#endif + or r5,r9,r10 /* OR the results to speed things up. */ + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle DWORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r0 + cmpb r9,r12,r4 + or r5,r9,r10 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r10,r12,r0 + cmpb r9,r12,r4 + cmpb r6,r11,r0 + cmpb r7,r11,r4 + or r5,r9,r10 + or r10,r6,r7 + or r11,r5,r10 + cmpdi cr7,r11,0 + beq cr7,L(loop) + + /* OK, one (or both) of the doublewords contains a c/null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c/null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The c/null byte must be in the second doubleword. Adjust the + address again and move the result of cmpb to r5 so we can calculate + the pointer. */ + mr r5,r10 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the c/null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of matching c/null byte. */ + blr +END (STRCHRNUL) +weak_alias (STRCHRNUL, strchrnul) +libc_hidden_builtin_def (STRCHRNUL) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcmp.S new file mode 100644 index 0000000000..14e14f457e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strcmp.S @@ -0,0 +1,168 @@ +/* Optimized strcmp implementation for Power7 using 'cmpb' instruction + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* The optimization is achieved here through cmpb instruction. + 8byte aligned strings are processed with double word comparision + and unaligned strings are handled effectively with loop unrolling + technique */ + +#include <sysdep.h> + +#ifndef STRCMP +# define STRCMP strcmp +#endif + +/* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */ + + .machine power7 +EALIGN (STRCMP, 4, 0) + CALL_MCOUNT 2 + + or r9, r3, r4 + rldicl. r10, r9, 0, 61 /* are s1 and s2 8 byte aligned..? */ + bne cr0, L(process_unaligned_bytes) + li r5, 0 + + .align 4 +/* process input parameters on double word aligned boundary */ +L(unrollDword): + ld r8,0(r3) + ld r10,0(r4) + cmpb r7,r8,r5 + cmpdi cr7,r7,0 + mr r9,r7 + bne cr7,L(null_found) + cmpld cr7,r8,r10 + bne cr7,L(different) + + ld r8,8(r3) + ld r10,8(r4) + cmpb r7,r8,r5 + cmpdi cr7,r7,0 + mr r9,r7 + bne cr7,L(null_found) + cmpld cr7,r8,r10 + bne cr7,L(different) + + ld r8,16(r3) + ld r10,16(r4) + cmpb r7,r8,r5 + cmpdi cr7,r7,0 + mr r9,r7 + bne cr7,L(null_found) + cmpld cr7,r8,r10 + bne cr7,L(different) + + ld r8,24(r3) + ld r10,24(r4) + cmpb r7,r8,r5 + cmpdi cr7,r7,0 + mr r9,r7 + bne cr7,L(null_found) + cmpld cr7,r8,r10 + bne cr7,L(different) + + addi r3, r3, 32 + addi r4, r4, 32 + beq cr7, L(unrollDword) + + .align 4 +L(null_found): +#ifdef __LITTLE_ENDIAN__ + neg r7,r9 + and r9,r9,r7 + li r7,-1 + cntlzd r9,r9 + subfic r9,r9,71 + sld r9,r7,r9 +#else + cntlzd r9,r9 + li r7,-1 + addi r9,r9,8 + srd r9,r7,r9 +#endif + or r8,r8,r9 + or r10,r10,r9 + +L(different): + cmpb r9,r8,r10 +#ifdef __LITTLE_ENDIAN__ + addi r7,r9,1 + andc r9,r7,r9 + cntlzd r9,r9 + subfic r9,r9,63 +#else + not r9,r9 + cntlzd r9,r9 + subfic r9,r9,56 +#endif + srd r3,r8,r9 + srd r10,r10,r9 + rldicl r10,r10,0,56 + rldicl r3,r3,0,56 + subf r3,r10,r3 + blr + + .align 4 +L(process_unaligned_bytes): + lbz r9, 0(r3) /* load byte from s1 */ + lbz r10, 0(r4) /* load byte from s2 */ + cmpdi cr7, r9, 0 /* compare *s1 with NULL */ + beq cr7, L(diffOfNULL) /* if *s1 is NULL , return *s1 - *s2 */ + cmplw cr7, r9, r10 /* compare *s1 and *s2 */ + bne cr7, L(ComputeDiff) /* branch to compute difference and return */ + + lbz r9, 1(r3) /* load next byte from s1 */ + lbz r10, 1(r4) /* load next byte from s2 */ + cmpdi cr7, r9, 0 /* compare *s1 with NULL */ + beq cr7, L(diffOfNULL) /* if *s1 is NULL , return *s1 - *s2 */ + cmplw cr7, r9, r10 /* compare *s1 and *s2 */ + bne cr7, L(ComputeDiff) /* branch to compute difference and return */ + + lbz r9, 2(r3) /* unroll 3rd byte here */ + lbz r10, 2(r4) + cmpdi cr7, r9, 0 + beq cr7, L(diffOfNULL) + cmplw cr7, r9, r10 + bne 7, L(ComputeDiff) + + lbz r9, 3(r3) /* unroll 4th byte now */ + lbz r10, 3(r4) + addi r3, r3, 4 /* increment s1 by unroll factor */ + cmpdi cr7, r9, 0 + cmplw cr6, 9, r10 + beq cr7, L(diffOfNULL) + addi r4, r4, 4 /* increment s2 by unroll factor */ + beq cr6, L(process_unaligned_bytes) /* unroll byte processing */ + + .align 4 +L(ComputeDiff): + extsw r9, r9 + subf r10, r10, r9 /* compute s1 - s2 */ + extsw r3, r10 + blr /* return */ + + .align 4 +L(diffOfNULL): + li r9, 0 + subf r10, r10, r9 /* compute s1 - s2 */ + extsw r3, r10 /* sign extend result */ + blr /* return */ + +END (STRCMP) +libc_hidden_builtin_def (strcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strlen.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strlen.S new file mode 100644 index 0000000000..63848c460c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strlen.S @@ -0,0 +1,107 @@ +/* Optimized strlen implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] strlen (char *s [r3]) */ + +#ifndef STRLEN +# define STRLEN strlen +#endif + .machine power7 +ENTRY (STRLEN) + CALL_MCOUNT 1 + dcbt 0,r3 + clrrdi r4,r3,3 /* Align the address to doubleword boundary. */ + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + li r5,-1 /* MASK = 0xffffffffffffffff. */ + ld r12,0(r4) /* Load doubleword from memory. */ +#ifdef __LITTLE_ENDIAN__ + sld r5,r5,r6 +#else + srd r5,r5,r6 /* MASK = MASK >> padding. */ +#endif + orc r9,r12,r5 /* Mask bits that are not part of the string. */ + cmpb r10,r9,r0 /* Check for null bytes in DWORD1. */ + cmpdi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r4 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle DWORD2 of pair. */ + ldu r12,8(r4) + cmpb r10,r12,r0 + cmpdi cr7,r10,0 + bne cr7,L(done) + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + + ld r12, 8(r4) + ldu r11, 16(r4) + cmpb r10,r12,r0 + cmpb r9,r11,r0 + or r8,r9,r10 /* Merge everything in one doubleword. */ + cmpdi cr7,r8,0 + beq cr7,L(loop) + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r10,0 + addi r4,r4,-8 + bne cr6,L(done) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + length. */ + + mr r10,r9 + addi r4,r4,8 + + /* r10 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the length. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r9, r10, -1 /* Form a mask from trailing zeros. */ + andc r9, r9, r10 + popcntd r0, r9 /* Count the bits in the mask. */ +#else + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + subf r5,r3,r4 + srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r5,r0 /* Compute final length. */ + blr +END (STRLEN) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strncmp.S new file mode 100644 index 0000000000..d53b31be8e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strncmp.S @@ -0,0 +1,227 @@ +/* Optimized strcmp implementation for POWER7/PowerPC64. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRNCMP +# define STRNCMP strncmp +#endif + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strncmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + +EALIGN (STRNCMP,5,0) + CALL_MCOUNT 3 + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rWORD3 r10 +#define rWORD4 r11 +#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ +#define rBITDIF r11 /* bits that differ in s1 & s2 words */ +#define rTMP r12 + + dcbt 0,rSTR1 + nop + or rTMP,rSTR2,rSTR1 + lis r7F7F,0x7f7f + dcbt 0,rSTR2 + nop + clrldi. rTMP,rTMP,61 + cmpldi cr1,rN,0 + lis rFEFE,-0x101 + bne L(unaligned) +/* We are doubleword aligned so set up for two loops. first a double word + loop, then fall into the byte loop if any residual. */ + srdi. rTMP,rN,3 + clrldi rN,rN,61 + addi rFEFE,rFEFE,-0x101 + addi r7F7F,r7F7F,0x7f7f + cmpldi cr1,rN,0 + beq L(unaligned) + + mtctr rTMP + ld rWORD1,0(rSTR1) + ld rWORD2,0(rSTR2) + sldi rTMP,rFEFE,32 + insrdi r7F7F,r7F7F,32,0 + add rFEFE,rFEFE,rTMP + b L(g1) + +L(g0): + ldu rWORD1,8(rSTR1) + bne cr1,L(different) + ldu rWORD2,8(rSTR2) +L(g1): add rTMP,rFEFE,rWORD1 + nor rNEG,r7F7F,rWORD1 + bdz L(tail) + and. rTMP,rTMP,rNEG + cmpd cr1,rWORD1,rWORD2 + beq L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + +#ifdef __LITTLE_ENDIAN__ +L(endstring): + addi rTMP2, rTMP, -1 + beq cr1, L(equal) + andc rTMP2, rTMP2, rTMP + rldimi rTMP2, rTMP2, 1, 0 + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + cmpd cr1, rWORD1, rWORD2 + beq cr1, L(equal) + cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */ + addi rNEG, rBITDIF, 1 + orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */ + sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */ + andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */ + andc rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt L(highbit) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */ + addi rNEG, rBITDIF, 1 + orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */ + sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */ + andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */ + andc rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr + +#else +L(endstring): + and rTMP,r7F7F,rWORD1 + beq cr1,L(equal) + add rTMP,rTMP,r7F7F + xor. rBITDIF,rWORD1,rWORD2 + andc rNEG,rNEG,rTMP + blt L(highbit) + cntlzd rBITDIF,rBITDIF + cntlzd rNEG,rNEG + addi rNEG,rNEG,7 + cmpd cr1,rNEG,rBITDIF + sub rRTN,rWORD1,rWORD2 + blt cr1,L(equal) + sradi rRTN,rRTN,63 /* must return an int. */ + ori rRTN,rRTN,1 + blr +L(equal): + li rRTN,0 + blr + +L(different): + ld rWORD1,-8(rSTR1) + xor. rBITDIF,rWORD1,rWORD2 + sub rRTN,rWORD1,rWORD2 + blt L(highbit) + sradi rRTN,rRTN,63 + ori rRTN,rRTN,1 + blr +L(highbit): + sradi rRTN,rWORD2,63 + ori rRTN,rRTN,1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(tail): + and. rTMP,rTMP,rNEG + cmpd cr1,rWORD1,rWORD2 + bne L(endstring) + addi rSTR1,rSTR1,8 + bne cr1,L(different) + addi rSTR2,rSTR2,8 + cmpldi cr1,rN,0 +L(unaligned): + mtctr rN + ble cr1,L(ux) +L(uz): + lbz rWORD1,0(rSTR1) + lbz rWORD2,0(rSTR2) + .align 4 +L(u1): + cmpdi cr1,rWORD1,0 + bdz L(u4) + cmpd rWORD1,rWORD2 + beq cr1,L(u4) + bne L(u4) + lbzu rWORD3,1(rSTR1) + lbzu rWORD4,1(rSTR2) + cmpdi cr1,rWORD3,0 + bdz L(u3) + cmpd rWORD3,rWORD4 + beq cr1,L(u3) + bne L(u3) + lbzu rWORD1,1(rSTR1) + lbzu rWORD2,1(rSTR2) + cmpdi cr1,rWORD1,0 + bdz L(u4) + cmpd rWORD1,rWORD2 + beq cr1,L(u4) + bne L(u4) + lbzu rWORD3,1(rSTR1) + lbzu rWORD4,1(rSTR2) + cmpdi cr1,rWORD3,0 + bdz L(u3) + cmpd rWORD3,rWORD4 + beq cr1,L(u3) + bne L(u3) + lbzu rWORD1,1(rSTR1) + lbzu rWORD2,1(rSTR2) + b L(u1) + +L(u3): sub rRTN,rWORD3,rWORD4 + blr +L(u4): sub rRTN,rWORD1,rWORD2 + blr +L(ux): + li rRTN,0 + blr +END (STRNCMP) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strncpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strncpy.S new file mode 100644 index 0000000000..0224f74898 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strncpy.S @@ -0,0 +1,722 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Implements the functions + + char * [r3] strncpy (char *dst [r3], const char *src [r4], size_t n [r5]) + + AND + + char * [r3] stpncpy (char *dst [r3], const char *src [r4], size_t n [r5]) + + The algorithm is as follows: + > if src and dest are 8 byte aligned, perform double word copy + else + > copy byte by byte on unaligned addresses. + + The aligned comparison are made using cmpb instructions. */ + +/* The focus on optimization for performance improvements are as follows: + 1. data alignment [gain from aligned memory access on read/write] + 2. POWER7 gains performance with loop unrolling/unwinding + [gain by reduction of branch penalty]. + 3. The final pad with null bytes is done by calling an optimized + memset. */ + +#ifdef USE_AS_STPNCPY +# ifndef STPNCPY +# define FUNC_NAME __stpncpy +# else +# define FUNC_NAME STPNCPY +# endif +#else +# ifndef STRNCPY +# define FUNC_NAME strncpy +# else +# define FUNC_NAME STRNCPY +# endif +#endif /* !USE_AS_STPNCPY */ + +#define FRAMESIZE (FRAME_MIN_SIZE+32) + +#ifndef MEMSET +/* For builds with no IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define MEMSET __GI_memset +# else +# define MEMSET memset +# endif +#endif + + .machine power7 +EALIGN(FUNC_NAME, 4, 0) + CALL_MCOUNT 3 + + mflr r0 /* load link register LR to r0 */ + or r10, r3, r4 /* to verify source and destination */ + rldicl. r8, r10, 0, 61 /* is double word aligned .. ? */ + + std r19, -8(r1) /* save callers register , r19 */ + std r18, -16(r1) /* save callers register , r18 */ + std r0, 16(r1) /* store the link register */ + stdu r1, -FRAMESIZE(r1) /* create the stack frame */ + + mr r9, r3 /* save r3 into r9 for use */ + mr r18, r3 /* save r3 for retCode of strncpy */ + bne 0, L(unaligned) + +L(aligned): + srdi r11, r5, 3 /* compute count for CTR ; count = n/8 */ + cmpldi cr7, r11, 3 /* if count > 4 ; perform unrolling 4 times */ + ble 7, L(update1) + + ld r10, 0(r4) /* load doubleWord from src */ + cmpb r8, r10, r8 /* compare src with NULL ,we read just now */ + cmpdi cr7, r8, 0 /* if cmpb returned NULL ; we continue */ + bne cr7, L(update3) + + std r10, 0(r3) /* copy doubleword at offset=0 */ + ld r10, 8(r4) /* load next doubleword from offset=8 */ + cmpb r8, r10, r8 /* compare src with NULL , we read just now */ + cmpdi cr7, r8, 0 /* if cmpb returned NULL ; we continue */ + bne 7,L(HopBy8) + + addi r8, r11, -4 + mr r7, r3 + srdi r8, r8, 2 + mr r6, r4 + addi r8, r8, 1 + li r12, 0 + mtctr r8 + b L(dwordCopy) + + .p2align 4 +L(dWordUnroll): + std r8, 16(r9) + ld r8, 24(r4) /* load dword,perform loop unrolling again */ + cmpb r10, r8, r10 + cmpdi cr7, r10, 0 + bne cr7, L(HopBy24) + + std r8, 24(r7) /* copy dword at offset=24 */ + addi r9, r9, 32 + addi r4, r4, 32 + bdz L(leftDwords) /* continue with loop on counter */ + + ld r3, 32(r6) + cmpb r8, r3, r10 + cmpdi cr7, r8, 0 + bne cr7, L(update2) + + std r3, 32(r7) + ld r10, 40(r6) + cmpb r8, r10, r8 + cmpdi cr7, r8, 0 + bne cr7, L(HopBy40) + + mr r6, r4 /* update values */ + mr r7, r9 + mr r11, r0 + mr r5, r19 + +L(dwordCopy): + std r10, 8(r9) /* copy dword at offset=8 */ + addi r19, r5, -32 + addi r0, r11, -4 + ld r8, 16(r4) + cmpb r10, r8, r12 + cmpdi cr7, r10, 0 + beq cr7, L(dWordUnroll) + + addi r9, r9, 16 /* increment dst by 16 */ + addi r4, r4, 16 /* increment src by 16 */ + addi r5, r5, -16 /* decrement length 'n' by 16 */ + addi r0, r11, -2 /* decrement loop counter */ + +L(dWordUnrollOFF): + ld r10, 0(r4) /* load first dword */ + li r8, 0 /* load mask */ + cmpb r8, r10, r8 + cmpdi cr7, r8, 0 + bne cr7, L(byte_by_byte) + mtctr r0 + li r7, 0 + b L(CopyDword) + + .p2align 4 +L(loadDWordandCompare): + ld r10, 0(r4) + cmpb r8, r10, r7 + cmpdi cr7, r8, 0 + bne cr7, L(byte_by_byte) + +L(CopyDword): + addi r9, r9, 8 + std r10, -8(r9) + addi r4, r4, 8 + addi r5, r5, -8 + bdnz L(loadDWordandCompare) + +L(byte_by_byte): + cmpldi cr7, r5, 3 + ble cr7, L(verifyByte) + srdi r10, r5, 2 + mr r19, r9 + mtctr r10 + b L(firstByteUnroll) + + .p2align 4 +L(bytes_unroll): + lbz r10, 1(r4) /* load byte from src */ + cmpdi cr7, r10, 0 /* compare for NULL */ + stb r10, 1(r19) /* store byte to dst */ + beq cr7, L(updtDestComputeN2ndByte) + + addi r4, r4, 4 /* advance src */ + + lbz r10, -2(r4) /* perform loop unrolling for byte r/w */ + cmpdi cr7, r10, 0 + stb r10, 2(r19) + beq cr7, L(updtDestComputeN3rdByte) + + lbz r10, -1(r4) /* perform loop unrolling for byte r/w */ + addi r19, r19, 4 + cmpdi cr7, r10, 0 + stb r10, -1(r19) + beq cr7, L(ComputeNByte) + + bdz L(update0) + +L(firstByteUnroll): + lbz r10, 0(r4) /* perform loop unrolling for byte r/w */ + cmpdi cr7, 10, 0 + stb r10, 0(r19) + bne cr7, L(bytes_unroll) + addi r19, r19, 1 + +L(ComputeNByte): + subf r9, r19, r9 /* compute 'n'n bytes to fill */ + add r8, r9, r5 + +L(zeroFill): + cmpdi cr7, r8, 0 /* compare if length is zero */ + beq cr7, L(update3return) + + mr r3, r19 /* fill buffer with */ + li r4, 0 /* zero fill buffer */ + mr r5, r8 /* how many bytes to fill buffer with */ + bl MEMSET /* call optimized memset */ + nop + +L(update3return): +#ifdef USE_AS_STPNCPY + addi r3, r19, -1 /* update return value */ +#endif + +L(hop2return): +#ifndef USE_AS_STPNCPY + mr r3, r18 /* set return value */ +#endif + addi r1, r1, FRAMESIZE /* restore stack pointer */ + ld r0, 16(r1) /* read the saved link register */ + ld r18, -16(r1) /* restore callers save register, r18 */ + ld r19, -8(r1) /* restore callers save register, r19 */ + mtlr r0 /* branch to link register */ + blr /* return */ + + .p2align 4 +L(update0): + mr r9, r19 + + .p2align 4 +L(verifyByte): + rldicl. r8, r5, 0, 62 +#ifdef USE_AS_STPNCPY + mr r3, r9 +#endif + beq cr0, L(hop2return) + mtctr r8 + addi r4, r4, -1 + mr r19, r9 + b L(oneBYone) + + .p2align 4 +L(proceed): + bdz L(done) + +L(oneBYone): + lbzu r10, 1(r4) /* copy byte */ + addi r19, r19, 1 + addi r8, r8, -1 + cmpdi cr7, r10, 0 + stb r10, -1(r19) + bne cr7, L(proceed) + b L(zeroFill) + + .p2align 4 +L(done): + addi r1, r1, FRAMESIZE /* restore stack pointer */ +#ifdef USE_AS_STPNCPY + mr r3, r19 /* set the return value */ +#else + mr r3, r18 /* set the return value */ +#endif + ld r0, 16(r1) /* read the saved link register */ + ld r18, -16(r1) /* restore callers save register, r18 */ + ld r19, -8(r1) /* restore callers save register, r19 */ + mtlr r0 /* branch to link register */ + blr /* return */ + +L(update1): + mr r0, r11 + mr r19, r5 + + .p2align 4 +L(leftDwords): + cmpdi cr7, r0, 0 + mr r5, r19 + bne cr7, L(dWordUnrollOFF) + b L(byte_by_byte) + + .p2align 4 +L(updtDestComputeN2ndByte): + addi r19, r19, 2 /* update dst by 2 */ + subf r9, r19, r9 /* compute distance covered */ + add r8, r9, r5 + b L(zeroFill) + + .p2align 4 +L(updtDestComputeN3rdByte): + addi r19, r19, 3 /* update dst by 3 */ + subf r9, r19, r9 /* compute distance covered */ + add r8, r9, r5 + b L(zeroFill) + + .p2align 4 +L(HopBy24): + addi r9, r9, 24 /* increment dst by 24 */ + addi r4, r4, 24 /* increment src by 24 */ + addi r5, r5, -24 /* decrement length 'n' by 24 */ + addi r0, r11, -3 /* decrement loop counter */ + b L(dWordUnrollOFF) + + .p2align 4 +L(update2): + mr r5, r19 + b L(dWordUnrollOFF) + + .p2align 4 +L(HopBy40): + addi r9, r7, 40 /* increment dst by 40 */ + addi r4, r6, 40 /* increment src by 40 */ + addi r5, r5, -40 /* decrement length 'n' by 40 */ + addi r0, r11, -5 /* decrement loop counter */ + b L(dWordUnrollOFF) + +L(update3): + mr r0, r11 + b L(dWordUnrollOFF) + +L(HopBy8): + addi r9, r3, 8 /* increment dst by 8 */ + addi r4, r4, 8 /* increment src by 8 */ + addi r5, r5, -8 /* decrement length 'n' by 8 */ + addi r0, r11, -1 /* decrement loop counter */ + b L(dWordUnrollOFF) + +L(unaligned): + cmpdi r5, 16 /* Proceed byte by byte for less than 16 */ + ble L(byte_by_byte) + rldicl r7, r3, 0, 61 + rldicl r6, r4, 0, 61 + cmpdi r6, 0 /* Check src alignment */ + beq L(srcaligndstunalign) + /* src is unaligned */ + rlwinm r10, r4, 3,26,28 /* Calculate padding. */ + clrrdi r4, r4, 3 /* Align the addr to dw boundary */ + ld r8, 0(r4) /* Load doubleword from memory. */ + li r0, 0 + /* Discard bits not part of the string */ +#ifdef __LITTLE_ENDIAN__ + srd r7, r8, r10 +#else + sld r7, r8, r10 +#endif + cmpb r0, r7, r0 /* Compare each byte against null */ + /* Discard bits not part of the string */ +#ifdef __LITTLE_ENDIAN__ + sld r0, r0, r10 +#else + srd r0, r0, r10 +#endif + cmpdi r0, 0 + bne L(bytebybyte) /* if it has null, copy byte by byte */ + subfic r6, r6, 8 + rlwinm r12, r3, 3,26,28 /* Calculate padding in bits. */ + rldicl r9, r3, 0, 61 /* Calculate padding in bytes. */ + addi r3, r3, -1 + + cmpdi r12, 0 /* check dest alignment */ + beq L(srcunaligndstalign) + + /* both src and dst unaligned */ +#ifdef __LITTLE_ENDIAN__ + sld r8, r7, r10 + mr r11, r10 + addi r11, r11, -8 /* Adjust byte pointer on loaded dw */ +#else + srd r8, r7, r10 + subfic r11, r10, 64 +#endif + /* dst alignment is greater then src alignment? */ + cmpd cr7, r12, r10 + ble cr7, L(dst_align_small) + /* src alignment is less than dst */ + + /* Calculate the dst alignment difference */ + subfic r7, r9, 8 + mtctr r7 + + /* Write until dst is aligned */ + cmpdi r0, r7, 4 + blt L(storebyte1) /* less than 4, store byte by byte */ + beq L(equal1) /* if its 4, store word */ + addi r0, r7, -4 /* greater than 4, so stb and stw */ + mtctr r0 +L(storebyte1): +#ifdef __LITTLE_ENDIAN__ + addi r11, r11, 8 /* Adjust byte pointer on loaded dw */ +#else + addi r11, r11, -8 +#endif + srd r7, r8, r11 + stbu r7, 1(r3) + addi r5, r5, -1 + bdnz L(storebyte1) + + subfic r7, r9, 8 /* Check the remaining bytes */ + cmpdi r0, r7, 4 + blt L(proceed1) + + .align 4 +L(equal1): +#ifdef __LITTLE_ENDIAN__ + addi r11, r11, 8 /* Adjust byte pointer on loaded dw */ + srd r7, r8, r11 +#else + subfic r11, r11, 64 + sld r7, r8, r11 + srdi r7, r7, 32 +#endif + stw r7, 1(r3) + addi r3, r3, 4 + addi r5, r5, -4 + +L(proceed1): + mr r7, r8 + /* calculate the Left over bytes to be written */ + subfic r11, r10, 64 + subfic r12, r12, 64 + subf r12, r12, r11 /* remaining bytes on second dw */ + subfic r10, r12, 64 /* remaining bytes on first dw */ + subfic r9, r9, 8 + subf r6, r9, r6 /* recalculate padding */ +L(srcunaligndstalign): + addi r3, r3, 1 + subfic r12, r10, 64 /* remaining bytes on second dw */ + addi r4, r4, 8 + li r0,0 + b L(storedouble) + + .align 4 +L(dst_align_small): + mtctr r6 + /* Write until src is aligned */ +L(storebyte2): +#ifdef __LITTLE_ENDIAN__ + addi r11, r11, 8 /* Adjust byte pointer on dw */ +#else + addi r11, r11, -8 +#endif + srd r7, r8, r11 + stbu r7, 1(r3) + addi r5, r5, -1 + bdnz L(storebyte2) + + addi r4, r4, 8 /* Increment src pointer */ + addi r3, r3, 1 /* Increment dst pointer */ + mr r9, r3 + li r8, 0 + cmpd cr7, r12, r10 + beq cr7, L(aligned) + rldicl r6, r3, 0, 61 /* Recalculate padding */ + mr r7, r6 + + /* src is algined */ +L(srcaligndstunalign): + mr r9, r3 + mr r6, r7 + ld r8, 0(r4) + subfic r10, r7, 8 + mr r7, r8 + li r0, 0 /* Check null */ + cmpb r0, r8, r0 + cmpdi r0, 0 + bne L(byte_by_byte) /* Do byte by byte if there is NULL */ + rlwinm r12, r3, 3,26,28 /* Calculate padding */ + addi r3, r3, -1 + /* write byte by byte until aligned */ +#ifdef __LITTLE_ENDIAN__ + li r11, -8 +#else + li r11, 64 +#endif + mtctr r10 + cmpdi r0, r10, 4 + blt L(storebyte) + beq L(equal) + addi r0, r10, -4 + mtctr r0 +L(storebyte): +#ifdef __LITTLE_ENDIAN__ + addi r11, r11, 8 /* Adjust byte pointer on dw */ +#else + addi r11, r11, -8 +#endif + srd r7, r8, r11 + stbu r7, 1(r3) + addi r5, r5, -1 + bdnz L(storebyte) + + cmpdi r0, r10, 4 + blt L(align) + + .align 4 +L(equal): +#ifdef __LITTLE_ENDIAN__ + addi r11, r11, 8 + srd r7, r8, r11 +#else + subfic r11, r11, 64 + sld r7, r8, r11 + srdi r7, r7, 32 +#endif + stw r7, 1(r3) + addi r5, r5, -4 + addi r3, r3, 4 +L(align): + addi r3, r3, 1 + addi r4, r4, 8 /* Increment src pointer */ + subfic r10, r12, 64 + li r0, 0 + /* dst addr aligned to 8 */ +L(storedouble): + cmpdi r5, 8 + ble L(null1) + ld r7, 0(r4) /* load next dw */ + cmpb r0, r7, r0 + cmpdi r0, 0 /* check for null on each new dw */ + bne L(null) +#ifdef __LITTLE_ENDIAN__ + srd r9, r8, r10 /* bytes from first dw */ + sld r11, r7, r12 /* bytes from second dw */ +#else + sld r9, r8, r10 + srd r11, r7, r12 +#endif + or r11, r9, r11 /* make as a single dw */ + std r11, 0(r3) /* store as std on aligned addr */ + mr r8, r7 /* still few bytes left to be written */ + addi r3, r3, 8 /* increment dst addr */ + addi r4, r4, 8 /* increment src addr */ + addi r5, r5, -8 + b L(storedouble) /* Loop until NULL */ + + .align 4 + +/* We've hit the end of the string. Do the rest byte-by-byte. */ +L(null): + addi r3, r3, -1 + mr r10, r12 + mtctr r6 +#ifdef __LITTLE_ENDIAN__ + subfic r10, r10, 64 + addi r10, r10, -8 +#endif + cmpdi r0, r5, 4 + blt L(loop) + cmpdi r0, r6, 4 + blt L(loop) + + /* we can still use stw if leftover >= 4 */ +#ifdef __LITTLE_ENDIAN__ + addi r10, r10, 8 + srd r11, r8, r10 +#else + subfic r10, r10, 64 + sld r11, r8, r10 + srdi r11, r11, 32 +#endif + stw r11, 1(r3) + addi r5, r5, -4 + addi r3, r3, 4 + cmpdi r0, r5, 0 + beq L(g1) + cmpdi r0, r6, 4 + beq L(bytebybyte1) + addi r10, r10, 32 +#ifdef __LITTLE_ENDIAN__ + addi r10, r10, -8 +#else + subfic r10, r10, 64 +#endif + addi r0, r6, -4 + mtctr r0 + /* remaining byte by byte part of first dw */ +L(loop): +#ifdef __LITTLE_ENDIAN__ + addi r10, r10, 8 +#else + addi r10, r10, -8 +#endif + srd r0, r8, r10 + stbu r0, 1(r3) + addi r5, r5, -1 + cmpdi r0, r5, 0 + beq L(g1) + bdnz L(loop) +L(bytebybyte1): + addi r3, r3, 1 + /* remaining byte by byte part of second dw */ +L(bytebybyte): + addi r3, r3, -8 + addi r4, r4, -1 + +#ifdef __LITTLE_ENDIAN__ + extrdi. r0, r7, 8, 56 + stbu r7, 8(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 48 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 40 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 32 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 24 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 16 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 8 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi r0, r7, 8, 0 + stbu r0, 1(r3) + addi r5, r5, -1 + b L(g2) +#else + extrdi. r0, r7, 8, 0 + stbu r0, 8(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 8 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 16 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 24 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 32 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 40 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + extrdi. r0, r7, 8, 48 + stbu r0, 1(r3) + addi r5, r5, -1 + beq L(g2) + cmpdi r5, 0 + beq L(g1) + stbu r7, 1(r3) + addi r5, r5, -1 + b L(g2) +#endif +L(g1): +#ifdef USE_AS_STPNCPY + addi r3, r3, 1 +#endif +L(g2): + addi r3, r3, 1 + mr r19, r3 + mr r8, r5 + b L(zeroFill) +L(null1): + mr r9, r3 + subf r4, r6, r4 + b L(byte_by_byte) +END(FUNC_NAME) +#ifndef USE_AS_STPNCPY +libc_hidden_builtin_def (strncpy) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strnlen.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strnlen.S new file mode 100644 index 0000000000..a970b6ce30 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strnlen.S @@ -0,0 +1,182 @@ +/* Optimized strnlen implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by Luis Machado <luisgpm@br.ibm.com>. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRNLEN +# define STRNLEN __strnlen +#endif + +/* int [r3] strnlen (char *s [r3], int size [r4]) */ + .machine power7 +ENTRY (STRNLEN) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 + add r7,r3,r4 /* Calculate the last acceptable address. */ + cmpldi r4,32 + li r0,0 /* Doubleword with null chars. */ + addi r7,r7,-1 + + /* If we have less than 33 bytes to search, skip to a faster code. */ + ble L(small_range) + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + ld r12,0(r8) /* Load doubleword from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + sld r10,r10,r6 +#else + sld r10,r10,r6 + srd r10,r10,r6 +#endif + cmpldi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + clrrdi r7,r7,3 /* Address of last doubleword. */ + mtcrf 0x01,r8 + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop_setup) + + /* Handle DWORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r0 + cmpldi cr7,r10,0 + bne cr7,L(done) + +L(loop_setup): + /* The last dword we want to read in the loop below is the one + containing the last byte of the string, ie. the dword at + (s + size - 1) & ~7, or r7. The first dword read is at + r8 + 8, we read 2 * cnt dwords, so the last dword read will + be at r8 + 8 + 16 * cnt - 8. Solving for cnt gives + cnt = (r7 - r8) / 16 */ + sub r5,r7,r8 + srdi r6,r5,4 /* Number of loop iterations. */ + mtctr r6 /* Setup the counter. */ + + /* Main loop to look for the null byte in the string. Since + it's a small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + + ld r12,8(r8) + ldu r11,16(r8) + cmpb r10,r12,r0 + cmpb r9,r11,r0 + or r5,r9,r10 /* Merge everything in one doubleword. */ + cmpldi cr7,r5,0 + bne cr7,L(found) + bdnz L(loop) + + /* We may have one more dword to read. */ + cmpld cr6,r8,r7 + beq cr6,L(end_max) + + ldu r12,8(r8) + cmpb r10,r12,r0 + cmpldi cr6,r10,0 + bne cr6,L(done) + +L(end_max): + mr r3,r4 + blr + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + .align 4 +L(found): + cmpldi cr6,r10,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + length. */ + + mr r10,r9 + addi r8,r8,8 + + /* r10 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the length. + We need to make sure the null char is *before* the end of the + range. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r0,r10,-1 + andc r0,r0,r10 + popcntd r0,r0 +#else + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + sub r3,r8,r3 + srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r3,r0 /* Length until the match. */ + cmpld r3,r4 + blelr + mr r3,r4 + blr + +/* Deals with size <= 32. */ + .align 4 +L(small_range): + cmpldi r4,0 + beq L(end_max) + + clrrdi r7,r7,3 /* Address of last doubleword. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + ld r12,0(r8) /* Load doubleword from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + sld r10,r10,r6 +#else + sld r10,r10,r6 + srd r10,r10,r6 +#endif + cmpldi cr7,r10,0 + bne cr7,L(done) + + cmpld r8,r7 + beq L(end_max) + + .p2align 5 +L(loop_small): + ldu r12,8(r8) + cmpb r10,r12,r0 + cmpldi cr6,r10,0 + bne cr6,L(done) + cmpld r8,r7 + bne L(loop_small) + mr r3,r4 + blr + +END (STRNLEN) +libc_hidden_def (__strnlen) +weak_alias (__strnlen, strnlen) +libc_hidden_def (strnlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strrchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strrchr.S new file mode 100644 index 0000000000..c22393deb5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strrchr.S @@ -0,0 +1,260 @@ +/* Optimized strrchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] strrchr (char *s [r3], int c [r4]) */ + +#ifndef STRRCHR +# define STRRCHR strrchr +#endif + + .machine power7 +ENTRY (STRRCHR) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + cmpdi cr7,r4,0 + ld r12,0(r8) /* Load doubleword from memory. */ + li r9,0 /* used to store last occurence */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + + beq cr7,L(null_match) + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + /* r4 is changed now ,if its passed as more chars + check for null again */ + cmpdi cr7,r4,0 + beq cr7,L(null_match) + /* Now r4 has a doubleword of c bytes and r0 has + a doubleword of null bytes. */ + + cmpb r10,r12,r4 /* Compare each byte against c byte. */ + cmpb r11,r12,r0 /* Compare each byte against null byte. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + srd r11,r11,r6 + sld r10,r10,r6 + sld r11,r11,r6 +#else + sld r10,r10,r6 + sld r11,r11,r6 + srd r10,r10,r6 + srd r11,r11,r6 +#endif + or r5,r10,r11 /* OR the results to speed things up. */ + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + +L(align): + mtcrf 0x01,r8 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r7,16(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + cmpb r6,r7,r4 + cmpb r7,r7,r0 + or r12,r10,r11 + or r5,r6,r7 + or r5,r12,r5 + cmpdi cr7,r5,0 + beq cr7,L(loop) + + /* OK, one (or both) of the doublewords contains a c/null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c/null byte. */ + cmpdi cr6,r12,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The c/null byte must be in the second doubleword. Adjust the + address again and move the result of cmpb to r10 so we can calculate + the pointer. */ + + mr r10,r6 + mr r11,r7 + addi r8,r8,8 + + /* r10/r11 have the output of the cmpb instructions, that is, + 0xff in the same position as the c/null byte in the original + doubleword from the string. Use that to calculate the pointer. */ + +L(done): + /* if there are more than one 0xff in r11, find the first pos of ff + in r11 and fill r10 with 0 from that position */ + cmpdi cr7,r11,0 + beq cr7,L(no_null) +#ifdef __LITTLE_ENDIAN__ + addi r3,r11,-1 + andc r3,r3,r11 + popcntd r0,r3 +#else + cntlzd r0,r11 +#endif + subfic r0,r0,63 + li r6,-1 +#ifdef __LITTLE_ENDIAN__ + srd r0,r6,r0 +#else + sld r0,r6,r0 +#endif + and r10,r0,r10 +L(no_null): +#ifdef __LITTLE_ENDIAN__ + cntlzd r0,r10 /* Count leading zeros before c matches. */ + addi r3,r10,-1 + andc r3,r3,r10 + addi r10,r11,-1 + andc r10,r10,r11 + cmpld cr7,r3,r10 + bgt cr7,L(no_match) +#else + addi r3,r10,-1 /* Count trailing zeros before c matches. */ + andc r3,r3,r10 + popcntd r0,r3 + cmpld cr7,r11,r10 + bgt cr7,L(no_match) +#endif + srdi r0,r0,3 /* Convert trailing zeros to bytes. */ + subfic r0,r0,7 + add r9,r8,r0 /* Return address of the matching c byte + or null in case c was not found. */ + li r0,0 + cmpdi cr7,r11,0 /* If r11 == 0, no null's have been found. */ + beq cr7,L(align) + + .align 4 +L(no_match): + mr r3,r9 + blr + +/* We are here because strrchr was called with a null byte. */ + .align 4 +L(null_match): + /* r0 has a doubleword of null bytes. */ + + cmpb r5,r12,r0 /* Compare each byte against null bytes. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r5,r5,r6 + sld r5,r5,r6 +#else + sld r5,r5,r6 + srd r5,r5,r6 +#endif + cmpdi cr7,r5,0 /* If r10 == 0, no c or null bytes + have been found. */ + bne cr7,L(done_null) + + mtcrf 0x01,r8 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop_null) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r5,r12,r0 + cmpdi cr7,r5,0 + bne cr7,L(done_null) + b L(loop_null) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop_null): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r5,r12,r0 + cmpb r10,r11,r0 + or r6,r5,r10 + cmpdi cr7,r6,0 + beq cr7,L(loop_null) + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done_null) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + + mr r5,r10 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done_null): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert trailing zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching null byte. */ + blr +END (STRRCHR) +weak_alias (strrchr, rindex) +libc_hidden_builtin_def (strrchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c new file mode 100644 index 0000000000..a917b2157e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strstr-ppc64.c @@ -0,0 +1,27 @@ +/* Optimized strstr implementation for PowerPC64/POWER7. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRSTR __strstr_ppc +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(__name) + +extern __typeof (strstr) __strstr_ppc attribute_hidden; + +#include <string/strstr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strstr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strstr.S new file mode 100644 index 0000000000..260db2ed6d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/strstr.S @@ -0,0 +1,521 @@ +/* Optimized strstr implementation for PowerPC64/POWER7. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Char * [r3] strstr (char *s [r3], char * pat[r4]) */ + +/* The performance gain is obtained using aligned memory access, load + * doubleword and usage of cmpb instruction for quicker comparison. */ + +#define ITERATIONS 64 + +#ifndef STRSTR +# define STRSTR strstr +#endif + +#ifndef STRLEN +/* For builds with no IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define STRLEN __GI_strlen +# else +# define STRLEN strlen +# endif +#endif + +#ifndef STRNLEN +/* For builds with no IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define STRNLEN __GI_strnlen +# else +# define STRNLEN __strnlen +# endif +#endif + +#ifndef STRCHR +# ifdef SHARED +# define STRCHR __GI_strchr +# else +# define STRCHR strchr +# endif +#endif + +#define FRAMESIZE (FRAME_MIN_SIZE+32) + .machine power7 +EALIGN (STRSTR, 4, 0) + CALL_MCOUNT 2 + mflr r0 /* Load link register LR to r0. */ + std r31, -8(r1) /* Save callers register r31. */ + std r30, -16(r1) /* Save callers register r30. */ + std r29, -24(r1) /* Save callers register r29. */ + std r28, -32(r1) /* Save callers register r28. */ + std r0, 16(r1) /* Store the link register. */ + cfi_offset(r31, -8) + cfi_offset(r30, -16) + cfi_offset(r28, -32) + cfi_offset(r29, -24) + cfi_offset(lr, 16) + stdu r1, -FRAMESIZE(r1) /* Create the stack frame. */ + cfi_adjust_cfa_offset(FRAMESIZE) + + dcbt 0, r3 + dcbt 0, r4 + cmpdi cr7, r3, 0 + beq cr7, L(retnull) + cmpdi cr7, r4, 0 + beq cr7, L(retnull) + + mr r29, r3 + mr r30, r4 + mr r3, r4 + bl STRLEN + nop + + cmpdi cr7, r3, 0 /* If search str is null. */ + beq cr7, L(ret_r3) + + mr r31, r3 + mr r4, r3 + mr r3, r29 + bl STRNLEN + nop + + cmpd cr7, r3, r31 /* If len(r3) < len(r4). */ + blt cr7, L(retnull) + mr r3, r29 + lbz r4, 0(r30) + bl STRCHR + nop + + mr r11, r3 + /* If first char of search str is not present. */ + cmpdi cr7, r3, 0 + ble cr7, L(end) + /* Reg r28 is used to count the number of iterations. */ + li r28, 0 + rldicl r8, r3, 0, 52 /* Page cross check. */ + cmpldi cr7, r8, 4096-16 + bgt cr7, L(bytebybyte) + + rldicl r8, r30, 0, 52 + cmpldi cr7, r8, 4096-16 + bgt cr7, L(bytebybyte) + + /* If len(r4) < 8 handle in a different way. */ + /* Shift position based on null and use cmpb. */ + cmpdi cr7, r31, 8 + blt cr7, L(lessthan8) + + /* Len(r4) >= 8 reaches here. */ + mr r8, r3 /* Save r3 for future use. */ + mr r4, r30 /* Restore r4. */ + li r0, 0 + rlwinm r10, r30, 3, 26, 28 /* Calculate padding in bits. */ + clrrdi r4, r4, 3 /* Make r4 aligned to 8. */ + ld r6, 0(r4) + addi r4, r4, 8 + cmpdi cr7, r10, 0 /* Check if its already aligned? */ + beq cr7, L(begin1) +#ifdef __LITTLE_ENDIAN__ + srd r6, r6, r10 /* Discard unwanted bits. */ +#else + sld r6, r6, r10 +#endif + ld r9, 0(r4) + subfic r10, r10, 64 +#ifdef __LITTLE_ENDIAN__ + sld r9, r9, r10 /* Discard unwanted bits. */ +#else + srd r9, r9, r10 +#endif + or r6, r6, r9 /* Form complete search str. */ +L(begin1): + mr r29, r6 + rlwinm r10, r3, 3, 26, 28 + clrrdi r3, r3, 3 + ld r5, 0(r3) + cmpb r9, r0, r6 /* Check if input has null. */ + cmpdi cr7, r9, 0 + bne cr7, L(return3) + cmpb r9, r0, r5 /* Check if input has null. */ +#ifdef __LITTLE_ENDIAN__ + srd r9, r9, r10 +#else + sld r9, r9, r10 +#endif + cmpdi cr7, r9, 0 + bne cr7, L(retnull) + + li r12, -8 /* Shift values. */ + li r11, 72 /* Shift values. */ + cmpdi cr7, r10, 0 + beq cr7, L(nextbyte1) + mr r12, r10 + addi r12, r12, -8 + subfic r11, r12, 64 + +L(nextbyte1): + ldu r7, 8(r3) /* Load next dw. */ + addi r12, r12, 8 /* Shift one byte and compare. */ + addi r11, r11, -8 +#ifdef __LITTLE_ENDIAN__ + srd r9, r5, r12 /* Rotate based on mask. */ + sld r10, r7, r11 +#else + sld r9, r5, r12 + srd r10, r7, r11 +#endif + /* Form single dw from few bytes on first load and second load. */ + or r10, r9, r10 + /* Check for null in the formed dw. */ + cmpb r9, r0, r10 + cmpdi cr7, r9, 0 + bne cr7, L(retnull) + /* Cmpb search str and input str. */ + cmpb r9, r10, r6 + cmpdi cr7, r9, -1 + beq cr7, L(match) + addi r8, r8, 1 + b L(begin) + + .align 4 +L(match): + /* There is a match of 8 bytes, check next bytes. */ + cmpdi cr7, r31, 8 + beq cr7, L(return) + /* Update next starting point r8. */ + srdi r9, r11, 3 + subf r9, r9, r3 + mr r8, r9 + +L(secondmatch): + mr r5, r7 + rlwinm r10, r30, 3, 26, 28 /* Calculate padding in bits. */ + ld r6, 0(r4) + addi r4, r4, 8 + cmpdi cr7, r10, 0 /* Check if its already aligned? */ + beq cr7, L(proceed3) +#ifdef __LITTLE_ENDIAN__ + srd r6, r6, r10 /* Discard unwanted bits. */ + cmpb r9, r0, r6 + sld r9, r9, r10 +#else + sld r6, r6, r10 + cmpb r9, r0, r6 + srd r9, r9, r10 +#endif + cmpdi cr7, r9, 0 + bne cr7, L(proceed3) + ld r9, 0(r4) + subfic r10, r10, 64 +#ifdef __LITTLE_ENDIAN__ + sld r9, r9, r10 /* Discard unwanted bits. */ +#else + srd r9, r9, r10 +#endif + or r6, r6, r9 /* Form complete search str. */ + +L(proceed3): + li r7, 0 + addi r3, r3, 8 + cmpb r9, r0, r5 + cmpdi cr7, r9, 0 + bne cr7, L(proceed4) + ld r7, 0(r3) +L(proceed4): +#ifdef __LITTLE_ENDIAN__ + srd r9, r5, r12 + sld r10, r7, r11 +#else + sld r9, r5, r12 + srd r10, r7, r11 +#endif + /* Form single dw with few bytes from first and second load. */ + or r10, r9, r10 + cmpb r9, r0, r6 + cmpdi cr7, r9, 0 + bne cr7, L(return4) + /* Check for null in the formed dw. */ + cmpb r9, r0, r10 + cmpdi cr7, r9, 0 + bne cr7, L(retnull) + /* If the next 8 bytes dont match, start search again. */ + cmpb r9, r10, r6 + cmpdi cr7, r9, -1 + bne cr7, L(reset) + /* If the next 8 bytes match, load and compare next 8. */ + b L(secondmatch) + + .align 4 +L(reset): + /* Start the search again. */ + addi r8, r8, 1 + b L(begin) + + .align 4 +L(return3): + /* Count leading zeros and compare partial dw. */ +#ifdef __LITTLE_ENDIAN__ + addi r7, r9, -1 + andc r7, r7, r9 + popcntd r7, r7 + subfic r7, r7, 64 + sld r10, r5, r7 + sld r6, r6, r7 +#else + cntlzd r7, r9 + subfic r7, r7, 64 + srd r10, r5, r7 + srd r6, r6, r7 +#endif + cmpb r9, r10, r6 + cmpdi cr7, r9, -1 + addi r8, r8, 1 + /* Start search again if there is no match. */ + bne cr7, L(begin) + /* If the words match, update return values. */ + subfic r7, r7, 64 + srdi r7, r7, 3 + add r3, r3, r7 + subf r3, r31, r3 + b L(end) + + .align 4 +L(return4): + /* Count leading zeros and compare partial dw. */ +#ifdef __LITTLE_ENDIAN__ + addi r7, r9, -1 + andc r7, r7, r9 + popcntd r7, r7 + subfic r7, r7, 64 + sld r10, r10, r7 + sld r6, r6, r7 +#else + cntlzd r7, r9 + subfic r7, r7, 64 + srd r10, r10, r7 + srd r6, r6, r7 +#endif + cmpb r9, r10, r6 + cmpdi cr7, r9, -1 + addi r8, r8, 1 + bne cr7, L(begin) + subfic r7, r7, 64 + srdi r11, r11, 3 + subf r3, r11, r3 + srdi r7, r7, 3 + add r3, r3, r7 + subf r3, r31, r3 + b L(end) + + .align 4 +L(begin): + mr r3, r8 + /* When our iterations exceed ITERATIONS,fall back to default. */ + addi r28, r28, 1 + cmpdi cr7, r28, ITERATIONS + beq cr7, L(default) + lbz r4, 0(r30) + bl STRCHR + nop + /* If first char of search str is not present. */ + cmpdi cr7, r3, 0 + ble cr7, L(end) + mr r8, r3 + mr r4, r30 /* Restore r4. */ + li r0, 0 + mr r6, r29 + clrrdi r4, r4, 3 + addi r4, r4, 8 + b L(begin1) + + /* Handle less than 8 search string. */ + .align 4 +L(lessthan8): + mr r4, r3 + mr r9, r30 + li r0, 0 + + rlwinm r10, r9, 3, 26, 28 /* Calculate padding in bits. */ + srdi r8, r10, 3 /* Padding in bytes. */ + clrrdi r9, r9, 3 /* Make r4 aligned to 8. */ + ld r6, 0(r9) + cmpdi cr7, r10, 0 /* Check if its already aligned? */ + beq cr7, L(proceed2) +#ifdef __LITTLE_ENDIAN__ + srd r6, r6, r10 /* Discard unwanted bits. */ +#else + sld r6, r6, r10 +#endif + subfic r8, r8, 8 + cmpd cr7, r8, r31 /* Next load needed? */ + bge cr7, L(proceed2) + ld r7, 8(r9) + subfic r10, r10, 64 +#ifdef __LITTLE_ENDIAN__ + sld r7, r7, r10 /* Discard unwanted bits. */ +#else + srd r7, r7, r10 +#endif + or r6, r6, r7 /* Form complete search str. */ +L(proceed2): + mr r29, r6 + rlwinm r10, r3, 3, 26, 28 + clrrdi r7, r3, 3 /* Make r3 aligned. */ + ld r5, 0(r7) + sldi r8, r31, 3 + subfic r8, r8, 64 +#ifdef __LITTLE_ENDIAN__ + sld r6, r6, r8 + cmpb r9, r0, r5 + srd r9, r9, r10 +#else + srd r6, r6, r8 + cmpb r9, r0, r5 + sld r9, r9, r10 +#endif + cmpdi cr7, r9, 0 + bne cr7, L(noload) + cmpdi cr7, r10, 0 + beq cr7, L(continue) + ld r7, 8(r7) +L(continue1): + mr r12, r10 + addi r12, r12, -8 + subfic r11, r12, 64 + b L(nextbyte) + + .align 4 +L(continue): + ld r7, 8(r7) + li r12, -8 /* Shift values. */ + li r11, 72 /* Shift values. */ +L(nextbyte): + addi r12, r12, 8 /* Mask for rotation. */ + addi r11, r11, -8 +#ifdef __LITTLE_ENDIAN__ + srd r9, r5, r12 + sld r10, r7, r11 + or r10, r9, r10 + sld r10, r10, r8 + cmpb r9, r0, r10 + srd r9, r9, r8 +#else + sld r9, r5, r12 + srd r10, r7, r11 + or r10, r9, r10 + srd r10, r10, r8 + cmpb r9, r0, r10 + sld r9, r9, r8 +#endif + cmpdi cr7, r9, 0 + bne cr7, L(retnull) + cmpb r9, r10, r6 + cmpdi cr7, r9, -1 + beq cr7, L(end) + addi r3, r4, 1 + /* When our iterations exceed ITERATIONS,fall back to default. */ + addi r28, r28, 1 + cmpdi cr7, r28, ITERATIONS + beq cr7, L(default) + lbz r4, 0(r30) + bl STRCHR + nop + /* If first char of search str is not present. */ + cmpdi cr7, r3, 0 + ble cr7, L(end) + mr r4, r3 + mr r6, r29 + li r0, 0 + b L(proceed2) + + .align 4 +L(noload): + /* Reached null in r3, so skip next load. */ + li r7, 0 + b L(continue1) + + .align 4 +L(return): + /* Update return values. */ + srdi r9, r11, 3 + subf r3, r9, r3 + b L(end) + + /* Handling byte by byte. */ + .align 4 +L(bytebybyte): + mr r8, r3 + addi r8, r8, -1 +L(loop1): + addi r8, r8, 1 + mr r3, r8 + mr r4, r30 + lbz r6, 0(r4) + cmpdi cr7, r6, 0 + beq cr7, L(updater3) +L(loop): + lbz r5, 0(r3) + cmpdi cr7, r5, 0 + beq cr7, L(retnull) + cmpld cr7, r6, r5 + bne cr7, L(loop1) + addi r3, r3, 1 + addi r4, r4, 1 + lbz r6, 0(r4) + cmpdi cr7, r6, 0 + beq cr7, L(updater3) + b L(loop) + + /* Handling return values. */ + .align 4 +L(updater3): + subf r3, r31, r3 /* Reduce len of r4 from r3. */ + b L(end) + + .align 4 +L(ret_r3): + mr r3, r29 /* Return r3. */ + b L(end) + + .align 4 +L(retnull): + li r3, 0 /* Return NULL. */ + b L(end) + + .align 4 +L(default): + mr r4, r30 + bl __strstr_ppc + nop + + .align 4 +L(end): + addi r1, r1, FRAMESIZE /* Restore stack pointer. */ + cfi_adjust_cfa_offset(-FRAMESIZE) + ld r0, 16(r1) /* Restore the saved link register. */ + ld r28, -32(r1) /* Restore callers save register r28. */ + ld r29, -24(r1) /* Restore callers save register r29. */ + ld r30, -16(r1) /* Restore callers save register r30. */ + ld r31, -8(r1) /* Restore callers save register r31. */ + mtlr r0 /* Branch to link register. */ + blr +END (STRSTR) +libc_hidden_builtin_def (strstr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power7/sub_n.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/sub_n.S new file mode 100644 index 0000000000..848dad5718 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power7/sub_n.S @@ -0,0 +1,23 @@ +/* PowerPC64 mpn_lshift -- mpn_add_n/mpn_sub_n -- mpn addition and + subtraction. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define USE_AS_SUB +#include "add_n.S" diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/Implies new file mode 100644 index 0000000000..9a5e3c7277 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power7/fpu +powerpc/powerpc64/power7 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/Makefile b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/Makefile new file mode 100644 index 0000000000..71a59529f3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/Makefile @@ -0,0 +1,3 @@ +ifeq ($(subdir),string) +sysdep_routines += strcasestr-ppc64 +endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/Implies new file mode 100644 index 0000000000..1187cdfb0a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power7/fpu/ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S new file mode 100644 index 0000000000..4c42926a74 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/e_expf.S @@ -0,0 +1,303 @@ +/* Optimized expf(). PowerPC64/POWER8 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* Short algorithm description: + * + * Let K = 64 (table size). + * e^x = 2^(x/log(2)) = 2^n * T[j] * (1 + P(y)) + * where: + * x = m*log(2)/K + y, y in [0.0..log(2)/K] + * m = n*K + j, m,n,j - signed integer, j in [0..K-1] + * values of 2^(j/K) are tabulated as T[j]. + * + * P(y) is a minimax polynomial approximation of expf(y)-1 + * on small interval [0.0..log(2)/K]. + * + * P(y) = P3*y*y*y*y + P2*y*y*y + P1*y*y + P0*y, calculated as + * z = y*y; P(y) = (P3*z + P1)*z + (P2*z + P0)*y + * + * Special cases: + * expf(NaN) = NaN + * expf(+INF) = +INF + * expf(-INF) = 0 + * expf(x) = 1 for subnormals + * for finite argument, only expf(0)=1 is exact + * expf(x) overflows if x>88.7228317260742190 + * expf(x) underflows if x<-103.972076416015620 + */ + +#define C1 0x42ad496b /* Single precision 125*log(2). */ +#define C2 0x31800000 /* Single precision 2^(-28). */ +#define SP_INF 0x7f800000 /* Single precision Inf. */ +#define SP_EXP_BIAS 0x1fc0 /* Single precision exponent bias. */ + +#define DATA_OFFSET r9 + +/* Implements the function + + float [fp1] expf (float [fp1] x) */ + + .machine power8 +EALIGN(__ieee754_expf, 4, 0) + addis DATA_OFFSET,r2,.Lanchor@toc@ha + addi DATA_OFFSET,DATA_OFFSET,.Lanchor@toc@l + + xscvdpspn v0,v1 + mfvsrd r8,v0 /* r8 = x */ + lfd fp2,(.KLN2-.Lanchor)(DATA_OFFSET) + lfd fp3,(.P2-.Lanchor)(DATA_OFFSET) + rldicl r3,r8,32,33 /* r3 = |x| */ + lis r4,C1@ha /* r4 = 125*log(2) */ + ori r4,r4,C1@l + cmpw r3,r4 + lfd fp5,(.P3-.Lanchor)(DATA_OFFSET) + lfd fp4,(.RS-.Lanchor)(DATA_OFFSET) + fmadd fp2,fp1,fp2,fp4 /* fp2 = x * K/log(2) + (2^23 + 2^22) */ + bge L(special_paths) /* |x| >= 125*log(2) ? */ + + lis r4,C2@ha + ori r4,r4,C2@l + cmpw r3,r4 + blt L(small_args) /* |x| < 2^(-28) ? */ + + /* Main path: here if 2^(-28) <= |x| < 125*log(2) */ + frsp fp6,fp2 + xscvdpsp v2,v2 + mfvsrd r8,v2 + mr r3,r8 /* r3 = m */ + rldicl r8,r8,32,58 /* r8 = j */ + lfs fp4,(.SP_RS-.Lanchor)(DATA_OFFSET) + fsubs fp2,fp6,fp4 /* fp2 = m = x * K/log(2) */ + srdi r3,r3,32 + clrrwi r3,r3,6 /* r3 = n */ + lfd fp6,(.NLN2K-.Lanchor)(DATA_OFFSET) + fmadd fp0,fp2,fp6,fp1 /* fp0 = y = x - m*log(2)/K */ + fmul fp2,fp0,fp0 /* fp2 = z = y^2 */ + lfd fp4,(.P1-.Lanchor)(DATA_OFFSET) + lfd fp6,(.P0-.Lanchor)(DATA_OFFSET) + lis r4,SP_EXP_BIAS@ha + ori r4,r4,SP_EXP_BIAS@l + add r3,r3,r4 + rldic r3,r3,49,1 /* r3 = 2^n */ + fmadd fp4,fp5,fp2,fp4 /* fp4 = P3 * z + P1 */ + fmadd fp6,fp3,fp2,fp6 /* fp6 = P2 * z + P0 */ + mtvsrd v1,r3 + xscvspdp v1,v1 + fmul fp4,fp4,fp2 /* fp4 = (P3 * z + P1)*z */ + fmadd fp0,fp0,fp6,fp4 /* fp0 = P(y) */ + sldi r8,r8,3 /* Access doublewords from T[j]. */ + addi r6,DATA_OFFSET,(.Ttable-.Lanchor) + lfdx fp3,r6,r8 + fmadd fp0,fp0,fp3,fp3 /* fp0 = T[j] * (1 + P(y)) */ + fmul fp1,fp1,fp0 /* fp1 = 2^n * T[j] * (1 + P(y)) */ + frsp fp1,fp1 + blr + + .align 4 +/* x is either underflow, overflow, infinite or NaN. */ +L(special_paths): + srdi r8,r8,32 + rlwinm r8,r8,3,29,29 /* r8 = 0, if x positive. + r8 = 4, otherwise. */ + addi r6,DATA_OFFSET,(.SPRANGE-.Lanchor) + lwzx r4,r6,r8 /* r4 = .SPRANGE[signbit(x)] */ + cmpw r3,r4 + /* |x| <= .SPRANGE[signbit(x)] */ + ble L(near_under_or_overflow) + + lis r4,SP_INF@ha + ori r4,r4,SP_INF@l + cmpw r3,r4 + bge L(arg_inf_or_nan) /* |x| > Infinite ? */ + + addi r6,DATA_OFFSET,(.SPLARGE_SMALL-.Lanchor) + lfsx fp1,r6,r8 + fmuls fp1,fp1,fp1 + blr + + + .align 4 +L(small_args): + /* expf(x) = 1.0, where |x| < |2^(-28)| */ + lfs fp2,(.SPone-.Lanchor)(DATA_OFFSET) + fadds fp1,fp1,fp2 + blr + + + .align 4 +L(arg_inf_or_nan:) + bne L(arg_nan) + + /* expf(+INF) = +INF + expf(-INF) = 0 */ + addi r6,DATA_OFFSET,(.INF_ZERO-.Lanchor) + lfsx fp1,r6,r8 + blr + + + .align 4 +L(arg_nan): + /* expf(NaN) = NaN */ + fadd fp1,fp1,fp1 + frsp fp1,fp1 + blr + + .align 4 +L(near_under_or_overflow): + frsp fp6,fp2 + xscvdpsp v2,v2 + mfvsrd r8,v2 + mr r3,r8 /* r3 = m */ + rldicl r8,r8,32,58 /* r8 = j */ + lfs fp4,(.SP_RS-.Lanchor)(DATA_OFFSET) + fsubs fp2,fp6,fp4 /* fp2 = m = x * K/log(2) */ + srdi r3,r3,32 + clrrwi r3,r3,6 /* r3 = n */ + lfd fp6,(.NLN2K-.Lanchor)(DATA_OFFSET) + fmadd fp0,fp2,fp6,fp1 /* fp0 = y = x - m*log(2)/K */ + fmul fp2,fp0,fp0 /* fp2 = z = y^2 */ + lfd fp4,(.P1-.Lanchor)(DATA_OFFSET) + lfd fp6,(.P0-.Lanchor)(DATA_OFFSET) + ld r4,(.DP_EXP_BIAS-.Lanchor)(DATA_OFFSET) + add r3,r3,r4 + rldic r3,r3,46,1 /* r3 = 2 */ + fmadd fp4,fp5,fp2,fp4 /* fp4 = P3 * z + P1 */ + fmadd fp6,fp3,fp2,fp6 /* fp6 = P2 * z + P0 */ + mtvsrd v1,r3 + fmul fp4,fp4,fp2 /* fp4 = (P3*z + P1)*z */ + fmadd fp0,fp0,fp6,fp4 /* fp0 = P(y) */ + sldi r8,r8,3 /* Access doublewords from T[j]. */ + addi r6,DATA_OFFSET,(.Ttable-.Lanchor) + lfdx fp3,r6,r8 + fmadd fp0,fp0,fp3,fp3 /* fp0 = T[j] * (1 + T[j]) */ + fmul fp1,fp1,fp0 /* fp1 = 2^n * T[j] * (1 + T[j]) */ + frsp fp1,fp1 + blr +END(__ieee754_expf) + + .section .rodata, "a",@progbits +.Lanchor: + .balign 8 +/* Table T[j] = 2^(j/K). Double precision. */ +.Ttable: + .8byte 0x3ff0000000000000 + .8byte 0x3ff02c9a3e778061 + .8byte 0x3ff059b0d3158574 + .8byte 0x3ff0874518759bc8 + .8byte 0x3ff0b5586cf9890f + .8byte 0x3ff0e3ec32d3d1a2 + .8byte 0x3ff11301d0125b51 + .8byte 0x3ff1429aaea92de0 + .8byte 0x3ff172b83c7d517b + .8byte 0x3ff1a35beb6fcb75 + .8byte 0x3ff1d4873168b9aa + .8byte 0x3ff2063b88628cd6 + .8byte 0x3ff2387a6e756238 + .8byte 0x3ff26b4565e27cdd + .8byte 0x3ff29e9df51fdee1 + .8byte 0x3ff2d285a6e4030b + .8byte 0x3ff306fe0a31b715 + .8byte 0x3ff33c08b26416ff + .8byte 0x3ff371a7373aa9cb + .8byte 0x3ff3a7db34e59ff7 + .8byte 0x3ff3dea64c123422 + .8byte 0x3ff4160a21f72e2a + .8byte 0x3ff44e086061892d + .8byte 0x3ff486a2b5c13cd0 + .8byte 0x3ff4bfdad5362a27 + .8byte 0x3ff4f9b2769d2ca7 + .8byte 0x3ff5342b569d4f82 + .8byte 0x3ff56f4736b527da + .8byte 0x3ff5ab07dd485429 + .8byte 0x3ff5e76f15ad2148 + .8byte 0x3ff6247eb03a5585 + .8byte 0x3ff6623882552225 + .8byte 0x3ff6a09e667f3bcd + .8byte 0x3ff6dfb23c651a2f + .8byte 0x3ff71f75e8ec5f74 + .8byte 0x3ff75feb564267c9 + .8byte 0x3ff7a11473eb0187 + .8byte 0x3ff7e2f336cf4e62 + .8byte 0x3ff82589994cce13 + .8byte 0x3ff868d99b4492ed + .8byte 0x3ff8ace5422aa0db + .8byte 0x3ff8f1ae99157736 + .8byte 0x3ff93737b0cdc5e5 + .8byte 0x3ff97d829fde4e50 + .8byte 0x3ff9c49182a3f090 + .8byte 0x3ffa0c667b5de565 + .8byte 0x3ffa5503b23e255d + .8byte 0x3ffa9e6b5579fdbf + .8byte 0x3ffae89f995ad3ad + .8byte 0x3ffb33a2b84f15fb + .8byte 0x3ffb7f76f2fb5e47 + .8byte 0x3ffbcc1e904bc1d2 + .8byte 0x3ffc199bdd85529c + .8byte 0x3ffc67f12e57d14b + .8byte 0x3ffcb720dcef9069 + .8byte 0x3ffd072d4a07897c + .8byte 0x3ffd5818dcfba487 + .8byte 0x3ffda9e603db3285 + .8byte 0x3ffdfc97337b9b5f + .8byte 0x3ffe502ee78b3ff6 + .8byte 0x3ffea4afa2a490da + .8byte 0x3ffefa1bee615a27 + .8byte 0x3fff50765b6e4540 + .8byte 0x3fffa7c1819e90d8 + +.KLN2: + .8byte 0x40571547652b82fe /* Double precision K/log(2). */ + +/* Double precision polynomial coefficients. */ +.P0: + .8byte 0x3fefffffffffe7c6 +.P1: + .8byte 0x3fe00000008d6118 +.P2: + .8byte 0x3fc55550da752d4f +.P3: + .8byte 0x3fa56420eb78fa85 + +.RS: + .8byte 0x4168000000000000 /* Double precision 2^23 + 2^22. */ +.NLN2K: + .8byte 0xbf862e42fefa39ef /* Double precision -log(2)/K. */ +.DP_EXP_BIAS: + .8byte 0x000000000000ffc0 /* Double precision exponent bias. */ + + .balign 4 +.SPone: + .4byte 0x3f800000 /* Single precision 1.0. */ +.SP_RS: + .4byte 0x4b400000 /* Single precision 2^23 + 2^22. */ + +.SPRANGE: /* Single precision overflow/underflow bounds. */ + .4byte 0x42b17217 /* if x>this bound, then result overflows. */ + .4byte 0x42cff1b4 /* if x<this bound, then result underflows. */ + +.SPLARGE_SMALL: + .4byte 0x71800000 /* 2^100. */ + .4byte 0x0d800000 /* 2^-100. */ + +.INF_ZERO: + .4byte 0x7f800000 /* Single precision Inf. */ + .4byte 0 /* Single precision zero. */ + +strong_alias (__ieee754_expf, __expf_finite) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/multiarch/Implies new file mode 100644 index 0000000000..7fd86fdf87 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power7/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_cosf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_cosf.S new file mode 100644 index 0000000000..8dfa0076e0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_cosf.S @@ -0,0 +1,508 @@ +/* Optimized cosf(). PowerPC64/POWER8 version. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#define _ERRNO_H 1 +#include <bits/errno.h> + +#define FRAMESIZE (FRAME_MIN_SIZE+16) + +#define FLOAT_EXPONENT_SHIFT 23 +#define FLOAT_EXPONENT_BIAS 127 +#define INTEGER_BITS 3 + +#define PI_4 0x3f490fdb /* PI/4 */ +#define NINEPI_4 0x40e231d6 /* 9 * PI/4 */ +#define TWO_PN5 0x3d000000 /* 2^-5 */ +#define TWO_PN27 0x32000000 /* 2^-27 */ +#define INFINITY 0x7f800000 +#define TWO_P23 0x4b000000 /* 2^23 */ +#define FX_FRACTION_1_28 0x9249250 /* 0x100000000 / 28 + 1 */ + + /* Implements the function + + float [fp1] cosf (float [fp1] x) */ + + .machine power8 +EALIGN(__cosf, 4, 0) + addis r9,r2,L(anchor)@toc@ha + addi r9,r9,L(anchor)@toc@l + + lis r4,PI_4@h + ori r4,r4,PI_4@l + + xscvdpspn v0,v1 + mfvsrd r8,v0 + rldicl r3,r8,32,33 /* Remove sign bit. */ + + cmpw r3,r4 + bge L(greater_or_equal_pio4) + + lis r4,TWO_PN5@h + ori r4,r4,TWO_PN5@l + + cmpw r3,r4 + blt L(less_2pn5) + + /* Chebyshev polynomial of the form: + * 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). */ + + lfd fp9,(L(C0)-L(anchor))(r9) + lfd fp10,(L(C1)-L(anchor))(r9) + lfd fp11,(L(C2)-L(anchor))(r9) + lfd fp12,(L(C3)-L(anchor))(r9) + lfd fp13,(L(C4)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + lfd fp3,(L(DPone)-L(anchor))(r9) + + fmadd fp4,fp2,fp13,fp12 /* C3+x^2*C4 */ + fmadd fp4,fp2,fp4,fp11 /* C2+x^2*(C3+x^2*C4) */ + fmadd fp4,fp2,fp4,fp10 /* C1+x^2*(C2+x^2*(C3+x^2*C4)) */ + fmadd fp4,fp2,fp4,fp9 /* C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4))) */ + fmadd fp1,fp2,fp4,fp3 /* 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))) */ + frsp fp1,fp1 /* Round to single precision. */ + + blr + + .balign 16 +L(greater_or_equal_pio4): + lis r4,NINEPI_4@h + ori r4,r4,NINEPI_4@l + cmpw r3,r4 + bge L(greater_or_equal_9pio4) + + /* Calculate quotient of |x|/(PI/4). */ + lfd fp2,(L(invpio4)-L(anchor))(r9) + fabs fp1,fp1 /* |x| */ + fmul fp2,fp1,fp2 /* |x|/(PI/4) */ + fctiduz fp2,fp2 + mfvsrd r3,v2 /* n = |x| mod PI/4 */ + + /* Now use that quotient to find |x| mod (PI/2). */ + addi r7,r3,1 + rldicr r5,r7,2,60 /* ((n+1) >> 1) << 3 */ + addi r6,r9,(L(pio2_table)-L(anchor)) + lfdx fp4,r5,r6 + fsub fp1,fp1,fp4 + + .balign 16 +L(reduced): + /* Now we are in the range -PI/4 to PI/4. */ + + /* Work out if we are in a positive or negative primary interval. */ + addi r7,r7,2 + rldicl r4,r7,62,63 /* ((n+3) >> 2) & 1 */ + + /* Load a 1.0 or -1.0. */ + addi r5,r9,(L(ones)-L(anchor)) + sldi r4,r4,3 + lfdx fp0,r4,r5 + + /* Are we in the primary interval of sin or cos? */ + andi. r4,r7,0x2 + bne L(cos) + + /* Chebyshev polynomial of the form: + x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */ + + lfd fp9,(L(S0)-L(anchor))(r9) + lfd fp10,(L(S1)-L(anchor))(r9) + lfd fp11,(L(S2)-L(anchor))(r9) + lfd fp12,(L(S3)-L(anchor))(r9) + lfd fp13,(L(S4)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + fmul fp3,fp2,fp1 /* x^3 */ + + fmadd fp4,fp2,fp13,fp12 /* S3+x^2*S4 */ + fmadd fp4,fp2,fp4,fp11 /* S2+x^2*(S3+x^2*S4) */ + fmadd fp4,fp2,fp4,fp10 /* S1+x^2*(S2+x^2*(S3+x^2*S4)) */ + fmadd fp4,fp2,fp4,fp9 /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))) */ + fmadd fp4,fp3,fp4,fp1 /* x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))) */ + fmul fp4,fp4,fp0 /* Add in the sign. */ + frsp fp1,fp4 /* Round to single precision. */ + + blr + + .balign 16 +L(cos): + /* Chebyshev polynomial of the form: + 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). */ + + lfd fp9,(L(C0)-L(anchor))(r9) + lfd fp10,(L(C1)-L(anchor))(r9) + lfd fp11,(L(C2)-L(anchor))(r9) + lfd fp12,(L(C3)-L(anchor))(r9) + lfd fp13,(L(C4)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + lfd fp3,(L(DPone)-L(anchor))(r9) + + fmadd fp4,fp2,fp13,fp12 /* C3+x^2*C4 */ + fmadd fp4,fp2,fp4,fp11 /* C2+x^2*(C3+x^2*C4) */ + fmadd fp4,fp2,fp4,fp10 /* C1+x^2*(C2+x^2*(C3+x^2*C4)) */ + fmadd fp4,fp2,fp4,fp9 /* C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4))) */ + fmadd fp4,fp2,fp4,fp3 /* 1.0 + x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))) */ + fmul fp4,fp4,fp0 /* Add in the sign. */ + frsp fp1,fp4 /* Round to single precision. */ + + blr + + .balign 16 +L(greater_or_equal_9pio4): + lis r4,INFINITY@h + ori r4,r4,INFINITY@l + cmpw r3,r4 + bge L(inf_or_nan) + + lis r4,TWO_P23@h + ori r4,r4,TWO_P23@l + cmpw r3,r4 + bge L(greater_or_equal_2p23) + + fabs fp1,fp1 /* |x| */ + + /* Calculate quotient of |x|/(PI/4). */ + lfd fp2,(L(invpio4)-L(anchor))(r9) + + lfd fp3,(L(DPone)-L(anchor))(r9) + lfd fp4,(L(DPhalf)-L(anchor))(r9) + fmul fp2,fp1,fp2 /* |x|/(PI/4) */ + friz fp2,fp2 /* n = floor(|x|/(PI/4)) */ + + /* Calculate (n + 1) / 2. */ + fadd fp2,fp2,fp3 /* n + 1 */ + fmul fp3,fp2,fp4 /* (n + 1) / 2 */ + friz fp3,fp3 + + lfd fp4,(L(pio2hi)-L(anchor))(r9) + lfd fp5,(L(pio2lo)-L(anchor))(r9) + + fmul fp6,fp4,fp3 + fadd fp6,fp6,fp1 + fmadd fp1,fp5,fp3,fp6 + + fctiduz fp2,fp2 + mfvsrd r7,v2 /* n + 1 */ + + b L(reduced) + + .balign 16 +L(inf_or_nan): + bne L(skip_errno_setting) /* Is a NAN? */ + + /* We delayed the creation of the stack frame, as well as the saving of + the link register, because only at this point, we are sure that + doing so is actually needed. */ + + stfd fp1,-8(r1) + + /* Save the link register. */ + mflr r0 + std r0,16(r1) + cfi_offset(lr, 16) + + /* Create the stack frame. */ + stdu r1,-FRAMESIZE(r1) + cfi_adjust_cfa_offset(FRAMESIZE) + + bl JUMPTARGET(__errno_location) + nop + + /* Restore the stack frame. */ + addi r1,r1,FRAMESIZE + cfi_adjust_cfa_offset(-FRAMESIZE) + /* Restore the link register. */ + ld r0,16(r1) + mtlr r0 + + lfd fp1,-8(r1) + + /* errno = EDOM */ + li r4,EDOM + stw r4,0(r3) + +L(skip_errno_setting): + fsub fp1,fp1,fp1 /* x - x */ + blr + + .balign 16 +L(greater_or_equal_2p23): + fabs fp1,fp1 + + srwi r4,r3,FLOAT_EXPONENT_SHIFT + subi r4,r4,FLOAT_EXPONENT_BIAS + + /* We reduce the input modulo pi/4, so we need 3 bits of integer + to determine where in 2*pi we are. Index into our array + accordingly. */ + addi r4,r4,INTEGER_BITS + + /* To avoid an expensive divide, for the range we care about (0 - 127) + we can transform x/28 into: + + x/28 = (x * ((0x100000000 / 28) + 1)) >> 32 + + mulhwu returns the top 32 bits of the 64 bit result, doing the + shift for us in the same instruction. The top 32 bits are undefined, + so we have to mask them. */ + + lis r6,FX_FRACTION_1_28@h + ori r6,r6,FX_FRACTION_1_28@l + mulhwu r5,r4,r6 + clrldi r5,r5,32 + + /* Get our pointer into the invpio4_table array. */ + sldi r4,r5,3 + addi r6,r9,(L(invpio4_table)-L(anchor)) + add r4,r4,r6 + + lfd fp2,0(r4) + lfd fp3,8(r4) + lfd fp4,16(r4) + lfd fp5,24(r4) + + fmul fp6,fp2,fp1 + fmul fp7,fp3,fp1 + fmul fp8,fp4,fp1 + fmul fp9,fp5,fp1 + + /* Mask off larger integer bits in highest double word that we don't + care about to avoid losing precision when combining with smaller + values. */ + fctiduz fp10,fp6 + mfvsrd r7,v10 + rldicr r7,r7,0,(63-INTEGER_BITS) + mtvsrd v10,r7 + fcfidu fp10,fp10 /* Integer bits. */ + + fsub fp6,fp6,fp10 /* highest -= integer bits */ + + /* Work out the integer component, rounded down. Use the top two + limbs for this. */ + fadd fp10,fp6,fp7 /* highest + higher */ + + fctiduz fp10,fp10 + mfvsrd r7,v10 + andi. r0,r7,1 + fcfidu fp10,fp10 + + /* Subtract integer component from highest limb. */ + fsub fp12,fp6,fp10 + + beq L(even_integer) + + /* Our integer component is odd, so we are in the -PI/4 to 0 primary + region. We need to shift our result down by PI/4, and to do this + in the mod (4/PI) space we simply subtract 1. */ + lfd fp11,(L(DPone)-L(anchor))(r9) + fsub fp12,fp12,fp11 + + /* Now add up all the limbs in order. */ + fadd fp12,fp12,fp7 + fadd fp12,fp12,fp8 + fadd fp12,fp12,fp9 + + /* And finally multiply by pi/4. */ + lfd fp13,(L(pio4)-L(anchor))(r9) + fmul fp1,fp12,fp13 + + addi r7,r7,1 + b L(reduced) + +L(even_integer): + lfd fp11,(L(DPone)-L(anchor))(r9) + + /* Now add up all the limbs in order. */ + fadd fp12,fp12,fp7 + fadd fp12,r12,fp8 + fadd fp12,r12,fp9 + + /* We need to check if the addition of all the limbs resulted in us + overflowing 1.0. */ + fcmpu 0,fp12,fp11 + bgt L(greater_than_one) + + /* And finally multiply by pi/4. */ + lfd fp13,(L(pio4)-L(anchor))(r9) + fmul fp1,fp12,fp13 + + addi r7,r7,1 + b L(reduced) + +L(greater_than_one): + /* We did overflow 1.0 when adding up all the limbs. Add 1.0 to our + integer, and subtract 1.0 from our result. Since that makes the + integer component odd, we need to subtract another 1.0 as + explained above. */ + addi r7,r7,1 + + lfd fp11,(L(DPtwo)-L(anchor))(r9) + fsub fp12,fp12,fp11 + + /* And finally multiply by pi/4. */ + lfd fp13,(L(pio4)-L(anchor))(r9) + fmul fp1,fp12,fp13 + + addi r7,r7,1 + b L(reduced) + + .balign 16 +L(less_2pn5): + lis r4,TWO_PN27@h + ori r4,r4,TWO_PN27@l + + cmpw r3,r4 + blt L(less_2pn27) + + /* A simpler Chebyshev approximation is close enough for this range: + 1.0+x^2*(CC0+x^3*CC1). */ + + lfd fp10,(L(CC0)-L(anchor))(r9) + lfd fp11,(L(CC1)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + fmul fp3,fp2,fp1 /* x^3 */ + lfd fp1,(L(DPone)-L(anchor))(r9) + + fmadd fp4,fp3,fp11,fp10 /* CC0+x^3*CC1 */ + fmadd fp1,fp2,fp4,fp1 /* 1.0+x^2*(CC0+x^3*CC1) */ + + frsp fp1,fp1 /* Round to single precision. */ + + blr + + .balign 16 +L(less_2pn27): + /* Handle some special cases: + + cosf(subnormal) raises inexact + cosf(min_normalized) raises inexact + cosf(normalized) raises inexact. */ + + lfd fp2,(L(DPone)-L(anchor))(r9) + + fabs fp1,fp1 /* |x| */ + fsub fp1,fp2,fp1 /* 1.0-|x| */ + + frsp fp1,fp1 + + blr + +END (__cosf) + + .section .rodata, "a" + + .balign 8 + +L(anchor): + + /* Chebyshev constants for sin, range -PI/4 - PI/4. */ +L(S0): .8byte 0xbfc5555555551cd9 +L(S1): .8byte 0x3f81111110c2688b +L(S2): .8byte 0xbf2a019f8b4bd1f9 +L(S3): .8byte 0x3ec71d7264e6b5b4 +L(S4): .8byte 0xbe5a947e1674b58a + + /* Chebyshev constants for cos, range 2^-27 - 2^-5. */ +L(CC0): .8byte 0xbfdfffffff5cc6fd +L(CC1): .8byte 0x3fa55514b178dac5 + + /* Chebyshev constants for cos, range -PI/4 - PI/4. */ +L(C0): .8byte 0xbfdffffffffe98ae +L(C1): .8byte 0x3fa55555545c50c7 +L(C2): .8byte 0xbf56c16b348b6874 +L(C3): .8byte 0x3efa00eb9ac43cc0 +L(C4): .8byte 0xbe923c97dd8844d7 + +L(invpio2): + .8byte 0x3fe45f306dc9c883 /* 2/PI */ + +L(invpio4): + .8byte 0x3ff45f306dc9c883 /* 4/PI */ + +L(invpio4_table): + .8byte 0x0000000000000000 + .8byte 0x3ff45f306c000000 + .8byte 0x3e3c9c882a000000 + .8byte 0x3c54fe13a8000000 + .8byte 0x3aaf47d4d0000000 + .8byte 0x38fbb81b6c000000 + .8byte 0x3714acc9e0000000 + .8byte 0x3560e4107c000000 + .8byte 0x33bca2c756000000 + .8byte 0x31fbd778ac000000 + .8byte 0x300b7246e0000000 + .8byte 0x2e5d2126e8000000 + .8byte 0x2c97003248000000 + .8byte 0x2ad77504e8000000 + .8byte 0x290921cfe0000000 + .8byte 0x274deb1cb0000000 + .8byte 0x25829a73e0000000 + .8byte 0x23fd1046be000000 + .8byte 0x2224baed10000000 + .8byte 0x20709d338e000000 + .8byte 0x1e535a2f80000000 + .8byte 0x1cef904e64000000 + .8byte 0x1b0d639830000000 + .8byte 0x1964ce7d24000000 + .8byte 0x17b908bf16000000 + +L(pio4): + .8byte 0x3fe921fb54442d18 /* PI/4 */ + +/* PI/2 as a sum of two doubles. We only use 32 bits of the upper limb + to avoid losing significant bits when multiplying with up to + (2^22)/(pi/2). */ +L(pio2hi): + .8byte 0xbff921fb54400000 + +L(pio2lo): + .8byte 0xbdd0b4611a626332 + +L(pio2_table): + .8byte 0 + .8byte 0x3ff921fb54442d18 /* 1 * PI/2 */ + .8byte 0x400921fb54442d18 /* 2 * PI/2 */ + .8byte 0x4012d97c7f3321d2 /* 3 * PI/2 */ + .8byte 0x401921fb54442d18 /* 4 * PI/2 */ + .8byte 0x401f6a7a2955385e /* 5 * PI/2 */ + .8byte 0x4022d97c7f3321d2 /* 6 * PI/2 */ + .8byte 0x4025fdbbe9bba775 /* 7 * PI/2 */ + .8byte 0x402921fb54442d18 /* 8 * PI/2 */ + .8byte 0x402c463abeccb2bb /* 9 * PI/2 */ + .8byte 0x402f6a7a2955385e /* 10 * PI/2 */ + +L(small): + .8byte 0x3cd0000000000000 /* 2^-50 */ + +L(ones): + .8byte 0x3ff0000000000000 /* +1.0 */ + .8byte 0xbff0000000000000 /* -1.0 */ + +L(DPhalf): + .8byte 0x3fe0000000000000 /* 0.5 */ + +L(DPone): + .8byte 0x3ff0000000000000 /* 1.0 */ + +L(DPtwo): + .8byte 0x4000000000000000 /* 2.0 */ + +weak_alias(__cosf, cosf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S new file mode 100644 index 0000000000..fcdcb60293 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_finite.S @@ -0,0 +1,56 @@ +/* isfinite(). PowerPC64/POWER8 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#define MFVSRD_R3_V1 .long 0x7c230066 /* mfvsrd r3,vs1 */ + +/* int [r3] __finite ([fp1] x) */ + +EALIGN (__finite, 4, 0) + CALL_MCOUNT 0 + MFVSRD_R3_V1 + lis r9,0x8010 + clrldi r3,r3,1 /* r3 = r3 & 0x8000000000000000 */ + rldicr r9,r9,32,31 /* r9 = (r9 << 32) & 0xffffffff */ + add r3,r3,r9 + rldicl r3,r3,1,63 + blr +END (__finite) + +hidden_def (__finite) +weak_alias (__finite, finite) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__finite, __finitef) +hidden_def (__finitef) +weak_alias (__finitef, finitef) + +#if IS_IN (libm) +# if LONG_DOUBLE_COMPAT (libm, GLIBC_2_0) +compat_symbol (libm, __finite, __finitel, GLIBC_2_0) +compat_symbol (libm, finite, finitel, GLIBC_2_0) +# endif +#else +# if LONG_DOUBLE_COMPAT (libc, GLIBC_2_0) +compat_symbol (libc, __finite, __finitel, GLIBC_2_0); +compat_symbol (libc, finite, finitel, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_finitef.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_finitef.S new file mode 100644 index 0000000000..54bd94176d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_finitef.S @@ -0,0 +1 @@ +/* This function uses the same code as s_finite.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S new file mode 100644 index 0000000000..32814e4525 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isinf.S @@ -0,0 +1,61 @@ +/* isinf(). PowerPC64/POWER8 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#define MFVSRD_R3_V1 .long 0x7c230066 /* mfvsrd r3,vs1 */ + +/* int [r3] __isinf([fp1] x) */ + +EALIGN (__isinf, 4, 0) + CALL_MCOUNT 0 + MFVSRD_R3_V1 + lis r9,0x7ff0 /* r9 = 0x7ff0 */ + rldicl r10,r3,0,1 /* r10 = r3 & (0x8000000000000000) */ + sldi r9,r9,32 /* r9 = r9 << 52 */ + cmpd cr7,r10,r9 /* fp1 & 0x7ff0000000000000 ? */ + beq cr7,L(inf) + li r3,0 /* Not inf */ + blr +L(inf): + sradi r3,r3,63 /* r3 = r3 >> 63 */ + ori r3,r3,1 /* r3 = r3 | 0x1 */ + blr +END (__isinf) + +hidden_def (__isinf) +weak_alias (__isinf, isinf) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isinf, __isinff) +hidden_def (__isinff) +weak_alias (__isinff, isinff) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isinf, __isinfl) +weak_alias (__isinf, isinfl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0); +compat_symbol (libc, isinf, isinfl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isinff.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isinff.S new file mode 100644 index 0000000000..be759e091e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isinff.S @@ -0,0 +1 @@ +/* This function uses the same code as s_isinf.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S new file mode 100644 index 0000000000..af52e502b7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isnan.S @@ -0,0 +1,56 @@ +/* isnan(). PowerPC64/POWER8 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#define MFVSRD_R3_V1 .long 0x7c230066 /* mfvsrd r3,vs1 */ + +/* int [r3] __isnan([f1] x) */ + +EALIGN (__isnan, 4, 0) + CALL_MCOUNT 0 + MFVSRD_R3_V1 + lis r9,0x7ff0 + clrldi r3,r3,1 /* r3 = r3 & 0x8000000000000000 */ + rldicr r9,r9,32,31 /* r9 = (r9 << 32) & 0xffffffff */ + subf r3,r3,r9 + rldicl r3,r3,1,63 + blr +END (__isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +/* It turns out that the 'double' version will also always work for + single-precision. */ +strong_alias (__isnan, __isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +#ifdef NO_LONG_DOUBLE +strong_alias (__isnan, __isnanl) +weak_alias (__isnan, isnanl) +#endif + +#if !IS_IN (libm) +# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0) +compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0); +compat_symbol (libc, isnan, isnanl, GLIBC_2_0); +# endif +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isnanf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isnanf.S new file mode 100644 index 0000000000..b48c85e0d3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_isnanf.S @@ -0,0 +1 @@ +/* This function uses the same code as s_isnan.S. */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S new file mode 100644 index 0000000000..aa180b6901 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_llrint.S @@ -0,0 +1,45 @@ +/* Round double to long int. POWER8 PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +#define MFVSRD_R3_V1 .long 0x7c230066 /* mfvsrd r3,vs1 */ + +/* long long int[r3] __llrint (double x[fp1]) */ +ENTRY (__llrint) + CALL_MCOUNT 0 + fctid fp1,fp1 + MFVSRD_R3_V1 + blr +END (__llrint) + +strong_alias (__llrint, __lrint) +weak_alias (__llrint, llrint) +weak_alias (__lrint, lrint) + +#ifdef NO_LONG_DOUBLE +strong_alias (__llrint, __llrintl) +weak_alias (__llrint, llrintl) +strong_alias (__lrint, __lrintl) +weak_alias (__lrint, lrintl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llrint, llrintl, GLIBC_2_1) +compat_symbol (libm, __lrint, lrintl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S new file mode 100644 index 0000000000..043fc6a089 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_llround.S @@ -0,0 +1,48 @@ +/* llround function. POWER8 PowerPC64 version. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <endian.h> +#include <math_ldbl_opt.h> + +#define MFVSRD_R3_V1 .long 0x7c230066 /* mfvsrd r3,vs1 */ + +/* long long [r3] llround (float x [fp1]) */ + +ENTRY (__llround) + CALL_MCOUNT 0 + frin fp1,fp1 /* Round to nearest +-0.5. */ + fctidz fp1,fp1 /* Convert To Integer DW round toward 0. */ + MFVSRD_R3_V1 + blr +END (__llround) + +strong_alias (__llround, __lround) +weak_alias (__llround, llround) +weak_alias (__lround, lround) + +#ifdef NO_LONG_DOUBLE +weak_alias (__llround, llroundl) +strong_alias (__llround, __llroundl) +weak_alias (__lround, lroundl) +strong_alias (__lround, __lroundl) +#endif +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __llround, llroundl, GLIBC_2_1) +compat_symbol (libm, __lround, lroundl, GLIBC_2_1) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S new file mode 100644 index 0000000000..fb0add3462 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/fpu/s_sinf.S @@ -0,0 +1,519 @@ +/* Optimized sinf(). PowerPC64/POWER8 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#define _ERRNO_H 1 +#include <bits/errno.h> + +#define FRAMESIZE (FRAME_MIN_SIZE+16) + +#define FLOAT_EXPONENT_SHIFT 23 +#define FLOAT_EXPONENT_BIAS 127 +#define INTEGER_BITS 3 + +#define PI_4 0x3f490fdb /* PI/4 */ +#define NINEPI_4 0x40e231d6 /* 9 * PI/4 */ +#define TWO_PN5 0x3d000000 /* 2^-5 */ +#define TWO_PN27 0x32000000 /* 2^-27 */ +#define INFINITY 0x7f800000 +#define TWO_P23 0x4b000000 /* 2^27 */ +#define FX_FRACTION_1_28 0x9249250 /* 0x100000000 / 28 + 1 */ + + /* Implements the function + + float [fp1] sinf (float [fp1] x) */ + + .machine power8 +EALIGN(__sinf, 4, 0) + addis r9,r2,L(anchor)@toc@ha + addi r9,r9,L(anchor)@toc@l + + lis r4,PI_4@h + ori r4,r4,PI_4@l + + xscvdpspn v0,v1 + mfvsrd r8,v0 + rldicl r3,r8,32,33 /* Remove sign bit. */ + + cmpw r3,r4 + bge L(greater_or_equal_pio4) + + lis r4,TWO_PN5@h + ori r4,r4,TWO_PN5@l + + cmpw r3,r4 + blt L(less_2pn5) + + /* Chebyshev polynomial of the form: + * x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */ + + lfd fp9,(L(S0)-L(anchor))(r9) + lfd fp10,(L(S1)-L(anchor))(r9) + lfd fp11,(L(S2)-L(anchor))(r9) + lfd fp12,(L(S3)-L(anchor))(r9) + lfd fp13,(L(S4)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + fmul fp3,fp2,fp1 /* x^3 */ + + fmadd fp4,fp2,fp13,fp12 /* S3+x^2*S4 */ + fmadd fp4,fp2,fp4,fp11 /* S2+x^2*(S3+x^2*S4) */ + fmadd fp4,fp2,fp4,fp10 /* S1+x^2*(S2+x^2*(S3+x^2*S4)) */ + fmadd fp4,fp2,fp4,fp9 /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))) */ + fmadd fp1,fp3,fp4,fp1 /* x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))) */ + frsp fp1,fp1 /* Round to single precision. */ + + blr + + .balign 16 +L(greater_or_equal_pio4): + lis r4,NINEPI_4@h + ori r4,r4,NINEPI_4@l + cmpw r3,r4 + bge L(greater_or_equal_9pio4) + + /* Calculate quotient of |x|/(PI/4). */ + lfd fp2,(L(invpio4)-L(anchor))(r9) + fabs fp1,fp1 /* |x| */ + fmul fp2,fp1,fp2 /* |x|/(PI/4) */ + fctiduz fp2,fp2 + mfvsrd r3,v2 /* n = |x| mod PI/4 */ + + /* Now use that quotient to find |x| mod (PI/2). */ + addi r7,r3,1 + rldicr r5,r7,2,60 /* ((n+1) >> 1) << 3 */ + addi r6,r9,(L(pio2_table)-L(anchor)) + lfdx fp4,r5,r6 + fsub fp1,fp1,fp4 + + .balign 16 +L(reduced): + /* Now we are in the range -PI/4 to PI/4. */ + + /* Work out if we are in a positive or negative primary interval. */ + rldicl r4,r7,62,63 /* ((n+1) >> 2) & 1 */ + + /* We are operating on |x|, so we need to add back the original + sign. */ + rldicl r8,r8,33,63 /* (x >> 31) & 1, ie the sign bit. */ + xor r4,r4,r8 /* 0 if result should be positive, + 1 if negative. */ + + /* Load a 1.0 or -1.0. */ + addi r5,r9,(L(ones)-L(anchor)) + sldi r4,r4,3 + lfdx fp0,r4,r5 + + /* Are we in the primary interval of sin or cos? */ + andi. r4,r7,0x2 + bne L(cos) + + /* Chebyshev polynomial of the form: + x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */ + + lfd fp9,(L(S0)-L(anchor))(r9) + lfd fp10,(L(S1)-L(anchor))(r9) + lfd fp11,(L(S2)-L(anchor))(r9) + lfd fp12,(L(S3)-L(anchor))(r9) + lfd fp13,(L(S4)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + fmul fp3,fp2,fp1 /* x^3 */ + + fmadd fp4,fp2,fp13,fp12 /* S3+x^2*S4 */ + fmadd fp4,fp2,fp4,fp11 /* S2+x^2*(S3+x^2*S4) */ + fmadd fp4,fp2,fp4,fp10 /* S1+x^2*(S2+x^2*(S3+x^2*S4)) */ + fmadd fp4,fp2,fp4,fp9 /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))) */ + fmadd fp4,fp3,fp4,fp1 /* x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))) */ + fmul fp4,fp4,fp0 /* Add in the sign. */ + frsp fp1,fp4 /* Round to single precision. */ + + blr + + .balign 16 +L(cos): + /* Chebyshev polynomial of the form: + 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). */ + + lfd fp9,(L(C0)-L(anchor))(r9) + lfd fp10,(L(C1)-L(anchor))(r9) + lfd fp11,(L(C2)-L(anchor))(r9) + lfd fp12,(L(C3)-L(anchor))(r9) + lfd fp13,(L(C4)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + lfd fp3,(L(DPone)-L(anchor))(r9) + + fmadd fp4,fp2,fp13,fp12 /* C3+x^2*C4 */ + fmadd fp4,fp2,fp4,fp11 /* C2+x^2*(C3+x^2*C4) */ + fmadd fp4,fp2,fp4,fp10 /* C1+x^2*(C2+x^2*(C3+x^2*C4)) */ + fmadd fp4,fp2,fp4,fp9 /* C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4))) */ + fmadd fp4,fp2,fp4,fp3 /* 1.0 + x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))) */ + fmul fp4,fp4,fp0 /* Add in the sign. */ + frsp fp1,fp4 /* Round to single precision. */ + + blr + + .balign 16 +L(greater_or_equal_9pio4): + lis r4,INFINITY@h + ori r4,r4,INFINITY@l + cmpw r3,r4 + bge L(inf_or_nan) + + lis r4,TWO_P23@h + ori r4,r4,TWO_P23@l + cmpw r3,r4 + bge L(greater_or_equal_2p23) + + fabs fp1,fp1 /* |x| */ + + /* Calculate quotient of |x|/(PI/4). */ + lfd fp2,(L(invpio4)-L(anchor))(r9) + + lfd fp3,(L(DPone)-L(anchor))(r9) + lfd fp4,(L(DPhalf)-L(anchor))(r9) + fmul fp2,fp1,fp2 /* |x|/(PI/4) */ + friz fp2,fp2 /* n = floor(|x|/(PI/4)) */ + + /* Calculate (n + 1) / 2. */ + fadd fp2,fp2,fp3 /* n + 1 */ + fmul fp3,fp2,fp4 /* (n + 1) / 2 */ + friz fp3,fp3 + + lfd fp4,(L(pio2hi)-L(anchor))(r9) + lfd fp5,(L(pio2lo)-L(anchor))(r9) + + fmul fp6,fp4,fp3 + fadd fp6,fp6,fp1 + fmadd fp1,fp5,fp3,fp6 + + fctiduz fp2,fp2 + mfvsrd r7,v2 /* n + 1 */ + + b L(reduced) + + .balign 16 +L(inf_or_nan): + bne L(skip_errno_setting) /* Is a NAN? */ + + /* We delayed the creation of the stack frame, as well as the saving of + the link register, because only at this point, we are sure that + doing so is actually needed. */ + + stfd fp1,-8(r1) + + /* Save the link register. */ + mflr r0 + std r0,16(r1) + cfi_offset(lr, 16) + + /* Create the stack frame. */ + stdu r1,-FRAMESIZE(r1) + cfi_adjust_cfa_offset(FRAMESIZE) + + bl JUMPTARGET(__errno_location) + nop + + /* Restore the stack frame. */ + addi r1,r1,FRAMESIZE + cfi_adjust_cfa_offset(-FRAMESIZE) + /* Restore the link register. */ + ld r0,16(r1) + mtlr r0 + + lfd fp1,-8(r1) + + /* errno = EDOM */ + li r4,EDOM + stw r4,0(r3) + +L(skip_errno_setting): + fsub fp1,fp1,fp1 /* x - x */ + blr + + .balign 16 +L(greater_or_equal_2p23): + fabs fp1,fp1 + + srwi r4,r3,FLOAT_EXPONENT_SHIFT + subi r4,r4,FLOAT_EXPONENT_BIAS + + /* We reduce the input modulo pi/4, so we need 3 bits of integer + to determine where in 2*pi we are. Index into our array + accordingly. */ + addi r4,r4,INTEGER_BITS + + /* To avoid an expensive divide, for the range we care about (0 - 127) + we can transform x/28 into: + + x/28 = (x * ((0x100000000 / 28) + 1)) >> 32 + + mulhwu returns the top 32 bits of the 64 bit result, doing the + shift for us in the same instruction. The top 32 bits are undefined, + so we have to mask them. */ + + lis r6,FX_FRACTION_1_28@h + ori r6,r6,FX_FRACTION_1_28@l + mulhwu r5,r4,r6 + clrldi r5,r5,32 + + /* Get our pointer into the invpio4_table array. */ + sldi r4,r5,3 + addi r6,r9,(L(invpio4_table)-L(anchor)) + add r4,r4,r6 + + lfd fp2,0(r4) + lfd fp3,8(r4) + lfd fp4,16(r4) + lfd fp5,24(r4) + + fmul fp6,fp2,fp1 + fmul fp7,fp3,fp1 + fmul fp8,fp4,fp1 + fmul fp9,fp5,fp1 + + /* Mask off larger integer bits in highest double word that we don't + care about to avoid losing precision when combining with smaller + values. */ + fctiduz fp10,fp6 + mfvsrd r7,v10 + rldicr r7,r7,0,(63-INTEGER_BITS) + mtvsrd v10,r7 + fcfidu fp10,fp10 /* Integer bits. */ + + fsub fp6,fp6,fp10 /* highest -= integer bits */ + + /* Work out the integer component, rounded down. Use the top two + limbs for this. */ + fadd fp10,fp6,fp7 /* highest + higher */ + + fctiduz fp10,fp10 + mfvsrd r7,v10 + andi. r0,r7,1 + fcfidu fp10,fp10 + + /* Subtract integer component from highest limb. */ + fsub fp12,fp6,fp10 + + beq L(even_integer) + + /* Our integer component is odd, so we are in the -PI/4 to 0 primary + region. We need to shift our result down by PI/4, and to do this + in the mod (4/PI) space we simply subtract 1. */ + lfd fp11,(L(DPone)-L(anchor))(r9) + fsub fp12,fp12,fp11 + + /* Now add up all the limbs in order. */ + fadd fp12,fp12,fp7 + fadd fp12,fp12,fp8 + fadd fp12,fp12,fp9 + + /* And finally multiply by pi/4. */ + lfd fp13,(L(pio4)-L(anchor))(r9) + fmul fp1,fp12,fp13 + + addi r7,r7,1 + b L(reduced) + +L(even_integer): + lfd fp11,(L(DPone)-L(anchor))(r9) + + /* Now add up all the limbs in order. */ + fadd fp12,fp12,fp7 + fadd fp12,r12,fp8 + fadd fp12,r12,fp9 + + /* We need to check if the addition of all the limbs resulted in us + overflowing 1.0. */ + fcmpu 0,fp12,fp11 + bgt L(greater_than_one) + + /* And finally multiply by pi/4. */ + lfd fp13,(L(pio4)-L(anchor))(r9) + fmul fp1,fp12,fp13 + + addi r7,r7,1 + b L(reduced) + +L(greater_than_one): + /* We did overflow 1.0 when adding up all the limbs. Add 1.0 to our + integer, and subtract 1.0 from our result. Since that makes the + integer component odd, we need to subtract another 1.0 as + explained above. */ + addi r7,r7,1 + + lfd fp11,(L(DPtwo)-L(anchor))(r9) + fsub fp12,fp12,fp11 + + /* And finally multiply by pi/4. */ + lfd fp13,(L(pio4)-L(anchor))(r9) + fmul fp1,fp12,fp13 + + addi r7,r7,1 + b L(reduced) + + .balign 16 +L(less_2pn5): + lis r4,TWO_PN27@h + ori r4,r4,TWO_PN27@l + + cmpw r3,r4 + blt L(less_2pn27) + + /* A simpler Chebyshev approximation is close enough for this range: + x+x^3*(SS0+x^2*SS1). */ + + lfd fp10,(L(SS0)-L(anchor))(r9) + lfd fp11,(L(SS1)-L(anchor))(r9) + + fmul fp2,fp1,fp1 /* x^2 */ + fmul fp3,fp2,fp1 /* x^3 */ + + fmadd fp4,fp2,fp11,fp10 /* SS0+x^2*SS1 */ + fmadd fp1,fp3,fp4,fp1 /* x+x^3*(SS0+x^2*SS1) */ + + frsp fp1,fp1 /* Round to single precision. */ + + blr + + .balign 16 +L(less_2pn27): + cmpwi r3,0 + beq L(zero) + + /* Handle some special cases: + + sinf(subnormal) raises inexact/underflow + sinf(min_normalized) raises inexact/underflow + sinf(normalized) raises inexact. */ + + lfd fp2,(L(small)-L(anchor))(r9) + + fmul fp2,fp1,fp2 /* x * small */ + fsub fp1,fp1,fp2 /* x - x * small */ + + frsp fp1,fp1 + + blr + + .balign 16 +L(zero): + blr + +END (__sinf) + + .section .rodata, "a" + + .balign 8 + +L(anchor): + + /* Chebyshev constants for sin, range -PI/4 - PI/4. */ +L(S0): .8byte 0xbfc5555555551cd9 +L(S1): .8byte 0x3f81111110c2688b +L(S2): .8byte 0xbf2a019f8b4bd1f9 +L(S3): .8byte 0x3ec71d7264e6b5b4 +L(S4): .8byte 0xbe5a947e1674b58a + + /* Chebyshev constants for sin, range 2^-27 - 2^-5. */ +L(SS0): .8byte 0xbfc555555543d49d +L(SS1): .8byte 0x3f8110f475cec8c5 + + /* Chebyshev constants for cos, range -PI/4 - PI/4. */ +L(C0): .8byte 0xbfdffffffffe98ae +L(C1): .8byte 0x3fa55555545c50c7 +L(C2): .8byte 0xbf56c16b348b6874 +L(C3): .8byte 0x3efa00eb9ac43cc0 +L(C4): .8byte 0xbe923c97dd8844d7 + +L(invpio2): + .8byte 0x3fe45f306dc9c883 /* 2/PI */ + +L(invpio4): + .8byte 0x3ff45f306dc9c883 /* 4/PI */ + +L(invpio4_table): + .8byte 0x0000000000000000 + .8byte 0x3ff45f306c000000 + .8byte 0x3e3c9c882a000000 + .8byte 0x3c54fe13a8000000 + .8byte 0x3aaf47d4d0000000 + .8byte 0x38fbb81b6c000000 + .8byte 0x3714acc9e0000000 + .8byte 0x3560e4107c000000 + .8byte 0x33bca2c756000000 + .8byte 0x31fbd778ac000000 + .8byte 0x300b7246e0000000 + .8byte 0x2e5d2126e8000000 + .8byte 0x2c97003248000000 + .8byte 0x2ad77504e8000000 + .8byte 0x290921cfe0000000 + .8byte 0x274deb1cb0000000 + .8byte 0x25829a73e0000000 + .8byte 0x23fd1046be000000 + .8byte 0x2224baed10000000 + .8byte 0x20709d338e000000 + .8byte 0x1e535a2f80000000 + .8byte 0x1cef904e64000000 + .8byte 0x1b0d639830000000 + .8byte 0x1964ce7d24000000 + .8byte 0x17b908bf16000000 + +L(pio4): + .8byte 0x3fe921fb54442d18 /* PI/4 */ + +/* PI/2 as a sum of two doubles. We only use 32 bits of the upper limb + to avoid losing significant bits when multiplying with up to + (2^22)/(pi/2). */ +L(pio2hi): + .8byte 0xbff921fb54400000 + +L(pio2lo): + .8byte 0xbdd0b4611a626332 + +L(pio2_table): + .8byte 0 + .8byte 0x3ff921fb54442d18 /* 1 * PI/2 */ + .8byte 0x400921fb54442d18 /* 2 * PI/2 */ + .8byte 0x4012d97c7f3321d2 /* 3 * PI/2 */ + .8byte 0x401921fb54442d18 /* 4 * PI/2 */ + .8byte 0x401f6a7a2955385e /* 5 * PI/2 */ + .8byte 0x4022d97c7f3321d2 /* 6 * PI/2 */ + .8byte 0x4025fdbbe9bba775 /* 7 * PI/2 */ + .8byte 0x402921fb54442d18 /* 8 * PI/2 */ + .8byte 0x402c463abeccb2bb /* 9 * PI/2 */ + .8byte 0x402f6a7a2955385e /* 10 * PI/2 */ + +L(small): + .8byte 0x3cd0000000000000 /* 2^-50 */ + +L(ones): + .8byte 0x3ff0000000000000 /* +1.0 */ + .8byte 0xbff0000000000000 /* -1.0 */ + +L(DPhalf): + .8byte 0x3fe0000000000000 /* 0.5 */ + +L(DPone): + .8byte 0x3ff0000000000000 /* 1.0 */ + +L(DPtwo): + .8byte 0x4000000000000000 /* 2.0 */ + +weak_alias(__sinf, sinf) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/memcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/memcmp.S new file mode 100644 index 0000000000..46b9c0067a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/memcmp.S @@ -0,0 +1,1447 @@ +/* Optimized memcmp implementation for POWER7/PowerPC64. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* int [r3] memcmp (const char *s1 [r3], + const char *s2 [r4], + size_t size [r5]) */ + +/* TODO: change these to the actual instructions when the minimum required + binutils allows it. */ +#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) +#ifndef MEMCMP +# define MEMCMP memcmp +#endif + .machine power7 +EALIGN (MEMCMP, 4, 0) + CALL_MCOUNT 3 + +#define rRTN r3 +#define rSTR1 r3 /* First string arg. */ +#define rSTR2 r4 /* Second string arg. */ +#define rN r5 /* Max string length. */ +#define rWORD1 r6 /* Current word in s1. */ +#define rWORD2 r7 /* Current word in s2. */ +#define rWORD3 r8 /* Next word in s1. */ +#define rWORD4 r9 /* Next word in s2. */ +#define rWORD5 r10 /* Next word in s1. */ +#define rWORD6 r11 /* Next word in s2. */ + +#define rOFF8 r20 /* 8 bytes offset. */ +#define rOFF16 r21 /* 16 bytes offset. */ +#define rOFF24 r22 /* 24 bytes offset. */ +#define rOFF32 r23 /* 24 bytes offset. */ +#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */ +#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */ +#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */ +#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */ +#define rSHR r28 /* Unaligned shift right count. */ +#define rSHL r29 /* Unaligned shift left count. */ +#define rWORD7 r30 /* Next word in s1. */ +#define rWORD8 r31 /* Next word in s2. */ + +#define rWORD8SAVE (-8) +#define rWORD7SAVE (-16) +#define rOFF8SAVE (-24) +#define rOFF16SAVE (-32) +#define rOFF24SAVE (-40) +#define rOFF32SAVE (-48) +#define rSHRSAVE (-56) +#define rSHLSAVE (-64) +#define rWORD8SHIFTSAVE (-72) +#define rWORD2SHIFTSAVE (-80) +#define rWORD4SHIFTSAVE (-88) +#define rWORD6SHIFTSAVE (-96) + +#ifdef __LITTLE_ENDIAN__ +# define LD ldbrx +#else +# define LD ldx +#endif + + xor r10, rSTR2, rSTR1 + cmpldi cr6, rN, 0 + cmpldi cr1, rN, 8 + clrldi. r0, r10, 61 + clrldi r12, rSTR1, 61 + cmpldi cr5, r12, 0 + beq- cr6, L(zeroLength) + dcbt 0, rSTR1 + dcbt 0, rSTR2 + /* If less than 8 bytes or not aligned, use the unaligned + byte loop. */ + blt cr1, L(bytealigned) + bne L(unalignedqw) +/* At this point we know both strings have the same alignment and the + compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then we are already double word + aligned and can perform the DW aligned loop. */ + + .align 4 +L(samealignment): + or r11, rSTR2, rSTR1 + clrldi. r11, r11, 60 + beq L(qw_align) + /* Try to align to QW else proceed to DW loop. */ + clrldi. r10, r10, 60 + bne L(DW) + /* For the difference to reach QW alignment, load as DW. */ + clrrdi rSTR1, rSTR1, 3 + clrrdi rSTR2, rSTR2, 3 + subfic r10, r12, 8 + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + sldi r9, r10, 3 + subfic r9, r9, 64 + sld rWORD1, rWORD1, r9 + sld rWORD2, rWORD2, r9 + cmpld cr6, rWORD1, rWORD2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr6, L(ret_diff) + subf rN, r10, rN + + cmpld cr6, r11, r12 + bgt cr6, L(qw_align) + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpld cr6, rWORD1, rWORD2 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr6, L(different) + cmpldi cr6, rN, 8 + ble cr6, L(zeroLength) + addi rN, rN, -8 + /* Now both rSTR1 and rSTR2 are aligned to QW. */ + .align 4 +L(qw_align): + vspltisb v0, 0 + srdi. r6, rN, 6 + li r8, 16 + li r10, 32 + li r11, 48 + ble cr0, L(lessthan64) + mtctr r6 + vspltisb v8, 0 + vspltisb v6, 0 + /* Aligned vector loop. */ + .align 4 +L(aligned_loop): + lvx v4, 0, rSTR1 + lvx v5, 0, rSTR2 + vcmpequb. v7, v6, v8 + bnl cr6, L(different3) + lvx v6, rSTR1, r8 + lvx v8, rSTR2, r8 + vcmpequb. v7, v5, v4 + bnl cr6, L(different2) + lvx v4, rSTR1, r10 + lvx v5, rSTR2, r10 + vcmpequb. v7, v6, v8 + bnl cr6, L(different3) + lvx v6, rSTR1, r11 + lvx v8, rSTR2, r11 + vcmpequb. v7, v5, v4 + bnl cr6, L(different2) + addi rSTR1, rSTR1, 64 + addi rSTR2, rSTR2, 64 + bdnz L(aligned_loop) + vcmpequb. v7, v6, v8 + bnl cr6, L(different3) + clrldi rN, rN, 58 + /* Handle remainder for aligned loop. */ + .align 4 +L(lessthan64): + mr r9, rSTR1 + cmpdi cr6, rN, 0 + li rSTR1, 0 + blelr cr6 + lvx v4, 0, r9 + lvx v5, 0, rSTR2 + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + addi rN, rN, -16 + + cmpdi cr6, rN, 0 + blelr cr6 + lvx v4, r9, r8 + lvx v5, rSTR2, r8 + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + addi rN, rN, -16 + + cmpdi cr6, rN, 0 + blelr cr6 + lvx v4, r9, r10 + lvx v5, rSTR2, r10 + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + addi rN, rN, -16 + + cmpdi cr6, rN, 0 + blelr cr6 + lvx v4, r9, r11 + lvx v5, rSTR2, r11 + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + blr + + /* Calculate and return the difference. */ + .align 4 +L(different1): + cmpdi cr6, rN, 16 + bge cr6, L(different2) + /* Discard unwanted bytes. */ +#ifdef __LITTLE_ENDIAN__ + lvsr v1, 0, rN + vperm v4, v4, v0, v1 + vperm v5, v5, v0, v1 +#else + lvsl v1, 0, rN + vperm v4, v0, v4, v1 + vperm v5, v0, v5, v1 +#endif + vcmpequb. v7, v4, v5 + li rRTN, 0 + bltlr cr6 + .align 4 +L(different2): +#ifdef __LITTLE_ENDIAN__ + /* Reverse bytes for direct comparison. */ + lvsl v10, r0, r0 + vspltisb v8, 15 + vsububm v9, v8, v10 + vperm v4, v4, v0, v9 + vperm v5, v5, v0, v9 +#endif + MFVRD(r7, v4) + MFVRD(r9, v5) + cmpld cr6, r7, r9 + bne cr6, L(ret_diff) + /* Difference in second DW. */ + vsldoi v4, v4, v4, 8 + vsldoi v5, v5, v5, 8 + MFVRD(r7, v4) + MFVRD(r9, v5) + cmpld cr6, r7, r9 +L(ret_diff): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + .align 4 +L(different3): +#ifdef __LITTLE_ENDIAN__ + /* Reverse bytes for direct comparison. */ + vspltisb v9, 15 + lvsl v10, r0, r0 + vsububm v9, v9, v10 + vperm v6, v6, v0, v9 + vperm v8, v8, v0, v9 +#endif + MFVRD(r7, v6) + MFVRD(r9, v8) + cmpld cr6, r7, r9 + bne cr6, L(ret_diff) + /* Difference in second DW. */ + vsldoi v6, v6, v6, 8 + vsldoi v8, v8, v8, 8 + MFVRD(r7, v6) + MFVRD(r9, v8) + cmpld cr6, r7, r9 + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + + .align 4 +L(different): + cmpldi cr7, rN, 8 + bgt cr7, L(end) + /* Skip unwanted bytes. */ + sldi r8, rN, 3 + subfic r8, r8, 64 + srd rWORD1, rWORD1, r8 + srd rWORD2, rWORD2, r8 + cmpld cr6, rWORD1, rWORD2 + li rRTN, 0 + beqlr cr6 +L(end): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + + .align 4 +L(unalignedqw): + /* Proceed to DW unaligned loop,if there is a chance of pagecross. */ + rldicl r9, rSTR1, 0, 52 + add r9, r9, rN + cmpldi cr0, r9, 4096-16 + bgt cr0, L(unaligned) + rldicl r9, rSTR2, 0, 52 + add r9, r9, rN + cmpldi cr0, r9, 4096-16 + bgt cr0, L(unaligned) + li r0, 0 + li r8, 16 + vspltisb v0, 0 + /* Check if rSTR1 is aligned to QW. */ + andi. r11, rSTR1, 0xF + beq L(s1_align) + + /* Compare 16B and align S1 to QW. */ +#ifdef __LITTLE_ENDIAN__ + lvsr v10, 0, rSTR1 /* Compute mask. */ + lvsr v6, 0, rSTR2 /* Compute mask. */ +#else + lvsl v10, 0, rSTR1 /* Compute mask. */ + lvsl v6, 0, rSTR2 /* Compute mask. */ +#endif + lvx v5, 0, rSTR2 + lvx v9, rSTR2, r8 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v9, v5, v6 +#else + vperm v5, v5, v9, v6 +#endif + lvx v4, 0, rSTR1 + lvx v9, rSTR1, r8 +#ifdef __LITTLE_ENDIAN__ + vperm v4, v9, v4, v10 +#else + vperm v4, v4, v9, v10 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + cmpldi cr6, rN, 16 + ble cr6, L(zeroLength) + subfic r11, r11, 16 + subf rN, r11, rN + add rSTR1, rSTR1, r11 + add rSTR2, rSTR2, r11 + + /* As s1 is QW aligned prepare for unaligned loop. */ + .align 4 +L(s1_align): +#ifdef __LITTLE_ENDIAN__ + lvsr v6, 0, rSTR2 +#else + lvsl v6, 0, rSTR2 +#endif + lvx v5, 0, rSTR2 + srdi. r6, rN, 6 + li r10, 32 + li r11, 48 + ble cr0, L(lessthan64_unalign) + mtctr r6 + li r9, 64 + /* Unaligned vector loop. */ + .align 4 +L(unalign_qwloop): + lvx v4, 0, rSTR1 + lvx v10, rSTR2, r8 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different2) + vor v5, v10, v10 + lvx v4, rSTR1, r8 + lvx v10, rSTR2, r10 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different2) + vor v5, v10, v10 + lvx v4, rSTR1, r10 + lvx v10, rSTR2, r11 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different2) + vor v5, v10, v10 + lvx v4, rSTR1, r11 + lvx v10, rSTR2, r9 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different2) + vor v5, v10, v10 + addi rSTR1, rSTR1, 64 + addi rSTR2, rSTR2, 64 + bdnz L(unalign_qwloop) + clrldi rN, rN, 58 + /* Handle remainder for unaligned loop. */ + .align 4 +L(lessthan64_unalign): + mr r9, rSTR1 + cmpdi cr6, rN, 0 + li rSTR1, 0 + blelr cr6 + lvx v4, 0, r9 + lvx v10, rSTR2, r8 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + vor v5, v10, v10 + addi rN, rN, -16 + + cmpdi cr6, rN, 0 + blelr cr6 + lvx v4, r9, r8 + lvx v10, rSTR2, r10 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + vor v5, v10, v10 + addi rN, rN, -16 + + cmpdi cr6, rN, 0 + blelr cr6 + lvx v4, r9, r10 + lvx v10, rSTR2, r11 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + vor v5, v10, v10 + addi rN, rN, -16 + + cmpdi cr6, rN, 0 + blelr cr6 + lvx v4, r9, r11 + addi r11, r11, 16 + lvx v10, rSTR2, r11 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v10, v5, v6 +#else + vperm v5, v5, v10, v6 +#endif + vcmpequb. v7, v5, v4 + bnl cr6, L(different1) + blr + +/* Otherwise we know the two strings have the same alignment (but not + yet DW). So we force the string addresses to the next lower DW + boundary and special case this first DW using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (DW aligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected register pair. */ + .align 4 +L(DW): + std rWORD8, rWORD8SAVE(r1) + std rWORD7, rWORD7SAVE(r1) + std rOFF8, rOFF8SAVE(r1) + std rOFF16, rOFF16SAVE(r1) + std rOFF24, rOFF24SAVE(r1) + std rOFF32, rOFF32SAVE(r1) + cfi_offset(rWORD8, rWORD8SAVE) + cfi_offset(rWORD7, rWORD7SAVE) + cfi_offset(rOFF8, rOFF8SAVE) + cfi_offset(rOFF16, rOFF16SAVE) + cfi_offset(rOFF24, rOFF24SAVE) + cfi_offset(rOFF32, rOFF32SAVE) + + li rOFF8,8 + li rOFF16,16 + li rOFF24,24 + li rOFF32,32 + clrrdi rSTR1, rSTR1, 3 + clrrdi rSTR2, rSTR2, 3 + beq cr5, L(DWaligned) + add rN, rN, r12 + sldi rWORD6, r12, 3 + srdi r0, rN, 5 /* Divide by 32. */ + andi. r12, rN, 24 /* Get the DW remainder. */ + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dPs4) + mtctr r0 + bgt cr1, L(dPs3) + beq cr1, L(dPs2) + +/* Remainder is 8. */ + .align 3 +L(dsP1): + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD2, rWORD6 + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) +/* Do something useful in this cycle since we have to branch anyway. */ + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr7, rWORD1, rWORD2 + b L(dP1e) +/* Remainder is 16. */ + .align 4 +L(dPs2): + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD2, rWORD6 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) +/* Do something useful in this cycle since we have to branch anyway. */ + LD rWORD7, rOFF8, rSTR1 + LD rWORD8, rOFF8, rSTR2 + cmpld cr5, rWORD7, rWORD8 + b L(dP2e) +/* Remainder is 24. */ + .align 4 +L(dPs3): + sld rWORD3, rWORD1, rWORD6 + sld rWORD4, rWORD2, rWORD6 + cmpld cr1, rWORD3, rWORD4 + b L(dP3e) +/* Count is a multiple of 32, remainder is 0. */ + .align 4 +L(dPs4): + mtctr r0 + sld rWORD1, rWORD1, rWORD6 + sld rWORD2, rWORD2, rWORD6 + cmpld cr7, rWORD1, rWORD2 + b L(dP4e) + +/* At this point we know both strings are double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWaligned): + andi. r12, rN, 24 /* Get the DW remainder. */ + srdi r0, rN, 5 /* Divide by 32. */ + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + beq L(dP4) + bgt cr1, L(dP3) + beq cr1, L(dP2) + +/* Remainder is 8. */ + .align 4 +L(dP1): + mtctr r0 +/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not + change any on the early exit path. The key here is the non-early + exit path only cares about the condition code (cr5), not about which + register pair was used. */ + LD rWORD5, 0, rSTR1 + LD rWORD6, 0, rSTR2 + cmpld cr5, rWORD5, rWORD6 + blt cr7, L(dP1x) + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr7, rWORD1, rWORD2 +L(dP1e): + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr1, rWORD3, rWORD4 + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5x) + bne cr7, L(dLcr7x) + + LD rWORD7, rOFF32, rSTR1 + LD rWORD8, rOFF32, rSTR2 + addi rSTR1, rSTR1, 32 + addi rSTR2, rSTR2, 32 + bne cr1, L(dLcr1) + cmpld cr5, rWORD7, rWORD8 + bdnz L(dLoop) + bne cr6, L(dLcr6) + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + .align 3 +L(dP1x): + sldi. r12, rN, 3 + bne cr5, L(dLcr5x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +/* Remainder is 16. */ + .align 4 +L(dP2): + mtctr r0 + LD rWORD5, 0, rSTR1 + LD rWORD6, 0, rSTR2 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP2x) + LD rWORD7, rOFF8, rSTR1 + LD rWORD8, rOFF8, rSTR2 + cmpld cr5, rWORD7, rWORD8 +L(dP2e): + LD rWORD1, rOFF16, rSTR1 + LD rWORD2, rOFF16, rSTR2 + cmpld cr7, rWORD1, rWORD2 + LD rWORD3, rOFF24, rSTR1 + LD rWORD4, rOFF24, rSTR2 + cmpld cr1, rWORD3, rWORD4 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr6, L(dLcr6) + bne cr5, L(dLcr5) + b L(dLoop2) + .align 4 +L(dP2x): + LD rWORD3, rOFF8, rSTR1 + LD rWORD4, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + sldi. r12, rN, 3 + bne cr6, L(dLcr6x) + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr1, L(dLcr1x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne L(d00) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +/* Remainder is 24. */ + .align 4 +L(dP3): + mtctr r0 + LD rWORD3, 0, rSTR1 + LD rWORD4, 0, rSTR2 + cmpld cr1, rWORD3, rWORD4 +L(dP3e): + LD rWORD5, rOFF8, rSTR1 + LD rWORD6, rOFF8, rSTR2 + cmpld cr6, rWORD5, rWORD6 + blt cr7, L(dP3x) + LD rWORD7, rOFF16, rSTR1 + LD rWORD8, rOFF16, rSTR2 + cmpld cr5, rWORD7, rWORD8 + LD rWORD1, rOFF24, rSTR1 + LD rWORD2, rOFF24, rSTR2 + cmpld cr7, rWORD1, rWORD2 + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + bne cr1, L(dLcr1) + bne cr6, L(dLcr6) + b L(dLoop1) +/* Again we are on a early exit path (24-31 byte compare), we want to + only use volatile registers and avoid restoring non-volatile + registers. */ + .align 4 +L(dP3x): + LD rWORD1, rOFF16, rSTR1 + LD rWORD2, rOFF16, rSTR2 + cmpld cr7, rWORD1, rWORD2 + sldi. r12, rN, 3 + bne cr1, L(dLcr1x) + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + bne cr6, L(dLcr6x) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + bne cr7, L(dLcr7x) + bne L(d00) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +/* Count is a multiple of 32, remainder is 0. */ + .align 4 +L(dP4): + mtctr r0 + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpld cr7, rWORD1, rWORD2 +L(dP4e): + LD rWORD3, rOFF8, rSTR1 + LD rWORD4, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + LD rWORD5, rOFF16, rSTR1 + LD rWORD6, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + LD rWORD7, rOFF24, rSTR1 + LD rWORD8, rOFF24, rSTR2 + addi rSTR1, rSTR1, 24 + addi rSTR2, rSTR2, 24 + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) + bne cr1, L(dLcr1) + bdz- L(d24) /* Adjust CTR as we start with +4. */ +/* This is the primary loop. */ + .align 4 +L(dLoop): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) +L(dLoop1): + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) +L(dLoop2): + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(dLcr7) +L(dLoop3): + LD rWORD7, rOFF32, rSTR1 + LD rWORD8, rOFF32, rSTR2 + addi rSTR1, rSTR1, 32 + addi rSTR2, rSTR2, 32 + bne cr1, L(dLcr1) + cmpld cr7, rWORD1, rWORD2 + bdnz L(dLoop) + +L(dL4): + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(dLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(dLcr5) + cmpld cr5, rWORD7, rWORD8 +L(d44): + bne cr7, L(dLcr7) +L(d34): + bne cr1, L(dLcr1) +L(d24): + bne cr6, L(dLcr6) +L(d14): + sldi. r12, rN, 3 + bne cr5, L(dLcr5) +L(d04): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */ + beq L(duzeroLength) +/* At this point we have a remainder of 1 to 7 bytes to compare. Since + we are aligned it is safe to load the whole double word, and use + shift right double to eliminate bits beyond the compare length. */ +L(d00): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN + cmpld cr7, rWORD1, rWORD2 + bne cr7, L(dLcr7x) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + + .align 4 +L(dLcr7): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr7x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr + .align 4 +L(dLcr1): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr1x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr + .align 4 +L(dLcr6): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr6x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + .align 4 +L(dLcr5): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dLcr5x): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 1 + bgtlr cr5 + li rRTN, -1 + blr + + .align 4 +L(bytealigned): + mtctr rN + +/* We need to prime this loop. This loop is swing modulo scheduled + to avoid pipe delays. The dependent instruction latencies (load to + compare to conditional branch) is 2 to 3 cycles. In this loop each + dispatch group ends in a branch and takes 1 cycle. Effectively + the first iteration of the loop only serves to load operands and + branches based on compares are delayed until the next loop. + + So we must precondition some registers and condition codes so that + we don't exit the loop early on the first iteration. */ + + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + bdz L(b11) + cmpld cr7, rWORD1, rWORD2 + lbz rWORD3, 1(rSTR1) + lbz rWORD4, 1(rSTR2) + bdz L(b12) + cmpld cr1, rWORD3, rWORD4 + lbzu rWORD5, 2(rSTR1) + lbzu rWORD6, 2(rSTR2) + bdz L(b13) + .align 4 +L(bLoop): + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + bne cr7, L(bLcr7) + + cmpld cr6, rWORD5, rWORD6 + bdz L(b3i) + + lbzu rWORD3, 1(rSTR1) + lbzu rWORD4, 1(rSTR2) + bne cr1, L(bLcr1) + + cmpld cr7, rWORD1, rWORD2 + bdz L(b2i) + + lbzu rWORD5, 1(rSTR1) + lbzu rWORD6, 1(rSTR2) + bne cr6, L(bLcr6) + + cmpld cr1, rWORD3, rWORD4 + bdnz L(bLoop) + +/* We speculatively loading bytes before we have tested the previous + bytes. But we must avoid overrunning the length (in the ctr) to + prevent these speculative loads from causing a segfault. In this + case the loop will exit early (before the all pending bytes are + tested. In this case we must complete the pending operations + before returning. */ +L(b1i): + bne cr7, L(bLcr7) + bne cr1, L(bLcr1) + b L(bx56) + .align 4 +L(b2i): + bne cr6, L(bLcr6) + bne cr7, L(bLcr7) + b L(bx34) + .align 4 +L(b3i): + bne cr1, L(bLcr1) + bne cr6, L(bLcr6) + b L(bx12) + .align 4 +L(bLcr7): + li rRTN, 1 + bgtlr cr7 + li rRTN, -1 + blr +L(bLcr1): + li rRTN, 1 + bgtlr cr1 + li rRTN, -1 + blr +L(bLcr6): + li rRTN, 1 + bgtlr cr6 + li rRTN, -1 + blr + +L(b13): + bne cr7, L(bx12) + bne cr1, L(bx34) +L(bx56): + sub rRTN, rWORD5, rWORD6 + blr + nop +L(b12): + bne cr7, L(bx12) +L(bx34): + sub rRTN, rWORD3, rWORD4 + blr +L(b11): +L(bx12): + sub rRTN, rWORD1, rWORD2 + blr + + .align 4 +L(zeroLength): + li rRTN, 0 + blr + + .align 4 +/* At this point we know the strings have different alignment and the + compare length is at least 8 bytes. r12 contains the low order + 3 bits of rSTR1 and cr5 contains the result of the logical compare + of r12 to 0. If r12 == 0 then rStr1 is double word + aligned and can perform the DWunaligned loop. + + Otherwise we know that rSTR1 is not already DW aligned yet. + So we can force the string addresses to the next lower DW + boundary and special case this first DW using shift left to + eliminate bits preceding the first byte. Since we want to join the + normal (DWaligned) compare loop, starting at the second double word, + we need to adjust the length (rN) and special case the loop + versioning for the first DW. This ensures that the loop count is + correct and the first DW (shifted) is in the expected resister pair. */ +L(unaligned): + std rWORD8, rWORD8SAVE(r1) + std rWORD7, rWORD7SAVE(r1) + std rOFF8, rOFF8SAVE(r1) + std rOFF16, rOFF16SAVE(r1) + std rOFF24, rOFF24SAVE(r1) + std rOFF32, rOFF32SAVE(r1) + cfi_offset(rWORD8, rWORD8SAVE) + cfi_offset(rWORD7, rWORD7SAVE) + cfi_offset(rOFF8, rOFF8SAVE) + cfi_offset(rOFF16, rOFF16SAVE) + cfi_offset(rOFF24, rOFF24SAVE) + cfi_offset(rOFF32, rOFF32SAVE) + li rOFF8,8 + li rOFF16,16 + li rOFF24,24 + li rOFF32,32 + std rSHL, rSHLSAVE(r1) + cfi_offset(rSHL, rSHLSAVE) + clrldi rSHL, rSTR2, 61 + beq cr6, L(duzeroLength) + std rSHR, rSHRSAVE(r1) + cfi_offset(rSHR, rSHRSAVE) + beq cr5, L(DWunaligned) + std rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + cfi_offset(rWORD8_SHIFT, rWORD8SHIFTSAVE) +/* Adjust the logical start of rSTR2 to compensate for the extra bits + in the 1st rSTR1 DW. */ + sub rWORD8_SHIFT, rSTR2, r12 +/* But do not attempt to address the DW before that DW that contains + the actual start of rSTR2. */ + clrrdi rSTR2, rSTR2, 3 + std rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) +/* Compute the left/right shift counts for the unaligned rSTR2, + compensating for the logical (DW aligned) start of rSTR1. */ + clrldi rSHL, rWORD8_SHIFT, 61 + clrrdi rSTR1, rSTR1, 3 + std rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) + sldi rSHL, rSHL, 3 + cmpld cr5, rWORD8_SHIFT, rSTR2 + add rN, rN, r12 + sldi rWORD6, r12, 3 + std rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + cfi_offset(rWORD2_SHIFT, rWORD2SHIFTSAVE) + cfi_offset(rWORD4_SHIFT, rWORD4SHIFTSAVE) + cfi_offset(rWORD6_SHIFT, rWORD6SHIFTSAVE) + subfic rSHR, rSHL, 64 + srdi r0, rN, 5 /* Divide by 32. */ + andi. r12, rN, 24 /* Get the DW remainder. */ +/* We normally need to load 2 DWs to start the unaligned rSTR2, but in + this special case those bits may be discarded anyway. Also we + must avoid loading a DW where none of the bits are part of rSTR2 as + this may cross a page boundary and cause a page fault. */ + li rWORD8, 0 + blt cr5, L(dus0) + LD rWORD8, 0, rSTR2 + addi rSTR2, rSTR2, 8 + sld rWORD8, rWORD8, rSHL + +L(dus0): + LD rWORD1, 0, rSTR1 + LD rWORD2, 0, rSTR2 + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + srd r12, rWORD2, rSHR + clrldi rN, rN, 61 + beq L(duPs4) + mtctr r0 + or rWORD8, r12, rWORD8 + bgt cr1, L(duPs3) + beq cr1, L(duPs2) + +/* Remainder is 8. */ + .align 4 +L(dusP1): + sld rWORD8_SHIFT, rWORD2, rSHL + sld rWORD7, rWORD1, rWORD6 + sld rWORD8, rWORD8, rWORD6 + bge cr7, L(duP1e) +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ + cmpld cr5, rWORD7, rWORD8 + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 16. */ + .align 4 +L(duPs2): + sld rWORD6_SHIFT, rWORD2, rSHL + sld rWORD5, rWORD1, rWORD6 + sld rWORD6, rWORD8, rWORD6 + b L(duP2e) +/* Remainder is 24. */ + .align 4 +L(duPs3): + sld rWORD4_SHIFT, rWORD2, rSHL + sld rWORD3, rWORD1, rWORD6 + sld rWORD4, rWORD8, rWORD6 + b L(duP3e) +/* Count is a multiple of 32, remainder is 0. */ + .align 4 +L(duPs4): + mtctr r0 + or rWORD8, r12, rWORD8 + sld rWORD2_SHIFT, rWORD2, rSHL + sld rWORD1, rWORD1, rWORD6 + sld rWORD2, rWORD8, rWORD6 + b L(duP4e) + +/* At this point we know rSTR1 is double word aligned and the + compare length is at least 8 bytes. */ + .align 4 +L(DWunaligned): + std rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + clrrdi rSTR2, rSTR2, 3 + std rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) + srdi r0, rN, 5 /* Divide by 32. */ + std rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) + andi. r12, rN, 24 /* Get the DW remainder. */ + std rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + cfi_offset(rWORD8_SHIFT, rWORD8SHIFTSAVE) + cfi_offset(rWORD2_SHIFT, rWORD2SHIFTSAVE) + cfi_offset(rWORD4_SHIFT, rWORD4SHIFTSAVE) + cfi_offset(rWORD6_SHIFT, rWORD6SHIFTSAVE) + sldi rSHL, rSHL, 3 + LD rWORD6, 0, rSTR2 + LD rWORD8, rOFF8, rSTR2 + addi rSTR2, rSTR2, 8 + cmpldi cr1, r12, 16 + cmpldi cr7, rN, 32 + clrldi rN, rN, 61 + subfic rSHR, rSHL, 64 + sld rWORD6_SHIFT, rWORD6, rSHL + beq L(duP4) + mtctr r0 + bgt cr1, L(duP3) + beq cr1, L(duP2) + +/* Remainder is 8. */ + .align 4 +L(duP1): + srd r12, rWORD8, rSHR + LD rWORD7, 0, rSTR1 + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP1x) +L(duP1e): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr5, rWORD7, rWORD8 + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr7, rWORD1, rWORD2 + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + bne cr5, L(duLcr5) + or rWORD4, r12, rWORD2_SHIFT + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr1, rWORD3, rWORD4 + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + bne cr7, L(duLcr7) + or rWORD6, r0, rWORD4_SHIFT + cmpld cr6, rWORD5, rWORD6 + b L(duLoop3) + .align 4 +/* At this point we exit early with the first double word compare + complete and remainder of 0 to 7 bytes. See L(du14) for details on + how we handle the remaining bytes. */ +L(duP1x): + cmpld cr5, rWORD7, rWORD8 + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) +/* Remainder is 16. */ + .align 4 +L(duP2): + srd r0, rWORD8, rSHR + LD rWORD5, 0, rSTR1 + or rWORD6, r0, rWORD6_SHIFT + sld rWORD6_SHIFT, rWORD8, rSHL +L(duP2e): + LD rWORD7, rOFF8, rSTR1 + LD rWORD8, rOFF8, rSTR2 + cmpld cr6, rWORD5, rWORD6 + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP2x) + LD rWORD1, rOFF16, rSTR1 + LD rWORD2, rOFF16, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT + LD rWORD3, rOFF24, rSTR1 + LD rWORD4, rOFF24, rSTR2 + cmpld cr7, rWORD1, rWORD2 + bne cr5, L(duLcr5) + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + cmpld cr1, rWORD3, rWORD4 + b L(duLoop2) + .align 4 +L(duP2x): + cmpld cr5, rWORD7, rWORD8 + addi rSTR1, rSTR1, 8 + addi rSTR2, rSTR2, 8 + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) + +/* Remainder is 24. */ + .align 4 +L(duP3): + srd r12, rWORD8, rSHR + LD rWORD3, 0, rSTR1 + sld rWORD4_SHIFT, rWORD8, rSHL + or rWORD4, r12, rWORD6_SHIFT +L(duP3e): + LD rWORD5, rOFF8, rSTR1 + LD rWORD6, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT + LD rWORD7, rOFF16, rSTR1 + LD rWORD8, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + blt cr7, L(duP3x) + LD rWORD1, rOFF24, rSTR1 + LD rWORD2, rOFF24, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + cmpld cr7, rWORD1, rWORD2 + b L(duLoop1) + .align 4 +L(duP3x): + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 + cmpld cr5, rWORD7, rWORD8 + bne cr6, L(duLcr6) + sldi. rN, rN, 3 + bne cr5, L(duLcr5) + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + b L(dutrim) + +/* Count is a multiple of 32, remainder is 0. */ + .align 4 +L(duP4): + mtctr r0 + srd r0, rWORD8, rSHR + LD rWORD1, 0, rSTR1 + sld rWORD2_SHIFT, rWORD8, rSHL + or rWORD2, r0, rWORD6_SHIFT +L(duP4e): + LD rWORD3, rOFF8, rSTR1 + LD rWORD4, rOFF8, rSTR2 + cmpld cr7, rWORD1, rWORD2 + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT + LD rWORD5, rOFF16, rSTR1 + LD rWORD6, rOFF16, rSTR2 + cmpld cr1, rWORD3, rWORD4 + bne cr7, L(duLcr7) + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT + LD rWORD7, rOFF24, rSTR1 + LD rWORD8, rOFF24, rSTR2 + addi rSTR1, rSTR1, 24 + addi rSTR2, rSTR2, 24 + cmpld cr6, rWORD5, rWORD6 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + cmpld cr5, rWORD7, rWORD8 + bdz L(du24) /* Adjust CTR as we start with +4. */ +/* This is the primary loop. */ + .align 4 +L(duLoop): + LD rWORD1, rOFF8, rSTR1 + LD rWORD2, rOFF8, rSTR2 + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + srd r0, rWORD2, rSHR + sld rWORD2_SHIFT, rWORD2, rSHL + or rWORD2, r0, rWORD8_SHIFT +L(duLoop1): + LD rWORD3, rOFF16, rSTR1 + LD rWORD4, rOFF16, rSTR2 + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + srd r12, rWORD4, rSHR + sld rWORD4_SHIFT, rWORD4, rSHL + or rWORD4, r12, rWORD2_SHIFT +L(duLoop2): + LD rWORD5, rOFF24, rSTR1 + LD rWORD6, rOFF24, rSTR2 + cmpld cr5, rWORD7, rWORD8 + bne cr7, L(duLcr7) + srd r0, rWORD6, rSHR + sld rWORD6_SHIFT, rWORD6, rSHL + or rWORD6, r0, rWORD4_SHIFT +L(duLoop3): + LD rWORD7, rOFF32, rSTR1 + LD rWORD8, rOFF32, rSTR2 + addi rSTR1, rSTR1, 32 + addi rSTR2, rSTR2, 32 + cmpld cr7, rWORD1, rWORD2 + bne cr1, L(duLcr1) + srd r12, rWORD8, rSHR + sld rWORD8_SHIFT, rWORD8, rSHL + or rWORD8, r12, rWORD6_SHIFT + bdnz L(duLoop) + +L(duL4): + cmpld cr1, rWORD3, rWORD4 + bne cr6, L(duLcr6) + cmpld cr6, rWORD5, rWORD6 + bne cr5, L(duLcr5) + cmpld cr5, rWORD7, rWORD8 +L(du44): + bne cr7, L(duLcr7) +L(du34): + bne cr1, L(duLcr1) +L(du24): + bne cr6, L(duLcr6) +L(du14): + sldi. rN, rN, 3 + bne cr5, L(duLcr5) +/* At this point we have a remainder of 1 to 7 bytes to compare. We use + shift right double to eliminate bits beyond the compare length. + + However it may not be safe to load rWORD2 which may be beyond the + string length. So we compare the bit length of the remainder to + the right shift count (rSHR). If the bit count is less than or equal + we do not need to load rWORD2 (all significant bits are already in + rWORD8_SHIFT). */ + cmpld cr7, rN, rSHR + beq L(duZeroReturn) + li r0, 0 + ble cr7, L(dutrim) + LD rWORD2, rOFF8, rSTR2 + srd r0, rWORD2, rSHR + .align 4 +L(dutrim): + LD rWORD1, rOFF8, rSTR1 + ld rWORD8, -8(r1) + subfic rN, rN, 64 /* Shift count is 64 - (rN * 8). */ + or rWORD2, r0, rWORD8_SHIFT + ld rWORD7, rWORD7SAVE(r1) + ld rSHL, rSHLSAVE(r1) + srd rWORD1, rWORD1, rN + srd rWORD2, rWORD2, rN + ld rSHR, rSHRSAVE(r1) + ld rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + li rRTN, 0 + cmpld cr7, rWORD1, rWORD2 + ld rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) + ld rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) + beq cr7, L(dureturn24) + li rRTN, 1 + ld rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + bgtlr cr7 + li rRTN, -1 + blr + .align 4 +L(duLcr7): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr7, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr1): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr1, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr6): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr6, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + .align 4 +L(duLcr5): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) + li rRTN, 1 + bgt cr5, L(dureturn29) + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) + li rRTN, -1 + b L(dureturn27) + + .align 3 +L(duZeroReturn): + li rRTN, 0 + .align 4 +L(dureturn): + ld rWORD8, rWORD8SAVE(r1) + ld rWORD7, rWORD7SAVE(r1) +L(dureturn29): + ld rSHL, rSHLSAVE(r1) + ld rSHR, rSHRSAVE(r1) +L(dureturn27): + ld rWORD8_SHIFT, rWORD8SHIFTSAVE(r1) + ld rWORD2_SHIFT, rWORD2SHIFTSAVE(r1) + ld rWORD4_SHIFT, rWORD4SHIFTSAVE(r1) +L(dureturn24): + ld rWORD6_SHIFT, rWORD6SHIFTSAVE(r1) + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + blr + +L(duzeroLength): + ld rOFF8, rOFF8SAVE(r1) + ld rOFF16, rOFF16SAVE(r1) + ld rOFF24, rOFF24SAVE(r1) + ld rOFF32, rOFF32SAVE(r1) + li rRTN, 0 + blr + +END (MEMCMP) +libc_hidden_builtin_def (memcmp) +weak_alias (memcmp, bcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/memset.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/memset.S new file mode 100644 index 0000000000..bc734c9f4f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/memset.S @@ -0,0 +1,458 @@ +/* Optimized memset implementation for PowerPC64/POWER8. + Copyright (C) 2014-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define MTVSRD_V1_R4 .long 0x7c240166 /* mtvsrd v1,r4 */ + +/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); + Returns 's'. */ + +#ifndef MEMSET +# define MEMSET memset +#endif + + /* No need to use .machine power8 since mtvsrd is already + handled by the define. It avoid breakage on binutils + that does not support this machine specifier. */ + .machine power7 +EALIGN (MEMSET, 5, 0) + CALL_MCOUNT 3 + +L(_memset): + cmpldi cr7,r5,31 + neg r0,r3 + mr r10,r3 + + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 /* Replicate byte to word. */ + ble cr7,L(write_LT_32) + + andi. r11,r10,15 /* Check alignment of DST. */ + insrdi r4,r4,32,0 /* Replicate word to double word. */ + + beq L(big_aligned) + + mtocrf 0x01,r0 + clrldi r0,r0,60 + + /* Get DST aligned to 16 bytes. */ +1: bf 31,2f + stb r4,0(r10) + addi r10,r10,1 + +2: bf 30,4f + sth r4,0(r10) + addi r10,r10,2 + +4: bf 29,8f + stw r4,0(r10) + addi r10,r10,4 + +8: bf 28,16f + std r4,0(r10) + addi r10,r10,8 + +16: subf r5,r0,r5 + + .align 4 +L(big_aligned): + /* For sizes larger than 255 two possible paths: + - if constant is '0', zero full cache lines with dcbz + - otherwise uses vector instructions. */ + cmpldi cr5,r5,255 + dcbtst 0,r10 + cmpldi cr6,r4,0 + crand 27,26,21 + bt 27,L(huge_dcbz) + bge cr5,L(huge_vector) + + + /* Size between 32 and 255 bytes with constant different than 0, use + doubleword store instruction to achieve best throughput. */ + srdi r8,r5,5 + clrldi r11,r5,59 + cmpldi cr6,r11,0 + cmpdi r8,0 + beq L(tail_bytes) + mtctr r8 + + /* Main aligned write loop, writes 32-bytes at a time. */ + .align 4 +L(big_loop): + std r4,0(r10) + std r4,8(r10) + std r4,16(r10) + std r4,24(r10) + addi r10,r10,32 + bdz L(tail_bytes) + + std r4,0(r10) + std r4,8(r10) + std r4,16(r10) + std r4,24(r10) + addi r10,10,32 + bdnz L(big_loop) + + b L(tail_bytes) + + /* Write remaining 1~31 bytes. */ + .align 4 +L(tail_bytes): + beqlr cr6 + + srdi r7,r11,4 + clrldi r8,r11,60 + mtocrf 0x01,r7 + + .align 4 + bf 31,8f + std r4,0(r10) + std r4,8(r10) + addi r10,r10,16 + + .align 4 +8: mtocrf 0x1,r8 + bf 28,4f + std r4,0(r10) + addi r10,r10,8 + + .align 4 +4: bf 29,2f + stw 4,0(10) + addi 10,10,4 + + .align 4 +2: bf 30,1f + sth 4,0(10) + addi 10,10,2 + + .align 4 +1: bflr 31 + stb 4,0(10) + blr + + /* Size larger than 255 bytes with constant different than 0, use + vector instruction to achieve best throughput. */ +L(huge_vector): + /* Replicate set byte to quadword in VMX register. */ + MTVSRD_V1_R4 + xxpermdi 32,v0,v1,0 + vspltb v2,v0,15 + + /* Main aligned write loop: 128 bytes at a time. */ + li r6,16 + li r7,32 + li r8,48 + mtocrf 0x02,r5 + srdi r12,r5,7 + cmpdi r12,0 + beq L(aligned_tail) + mtctr r12 + b L(aligned_128loop) + + .align 4 +L(aligned_128loop): + stvx v2,0,r10 + stvx v2,r10,r6 + stvx v2,r10,r7 + stvx v2,r10,r8 + addi r10,r10,64 + stvx v2,0,r10 + stvx v2,r10,r6 + stvx v2,r10,r7 + stvx v2,r10,r8 + addi r10,r10,64 + bdnz L(aligned_128loop) + + /* Write remaining 1~127 bytes. */ +L(aligned_tail): + mtocrf 0x01,r5 + bf 25,32f + stvx v2,0,r10 + stvx v2,r10,r6 + stvx v2,r10,r7 + stvx v2,r10,r8 + addi r10,r10,64 + +32: bf 26,16f + stvx v2,0,r10 + stvx v2,r10,r6 + addi r10,r10,32 + +16: bf 27,8f + stvx v2,0,r10 + addi r10,r10,16 + +8: bf 28,4f + std r4,0(r10) + addi r10,r10,8 + + /* Copies 4~7 bytes. */ +4: bf 29,L(tail2) + stw r4,0(r10) + bf 30,L(tail5) + sth r4,4(r10) + bflr 31 + stb r4,6(r10) + /* Return original DST pointer. */ + blr + + /* Special case when value is 0 and we have a long length to deal + with. Use dcbz to zero out a full cacheline of 128 bytes at a time. + Before using dcbz though, we need to get the destination 128-byte + aligned. */ + .align 4 +L(huge_dcbz): + andi. r11,r10,127 + neg r0,r10 + beq L(huge_dcbz_aligned) + + clrldi r0,r0,57 + subf r5,r0,r5 + srdi r0,r0,3 + mtocrf 0x01,r0 + + /* Write 1~128 bytes until DST is aligned to 128 bytes. */ +8: bf 28,4f + + std r4,0(r10) + std r4,8(r10) + std r4,16(r10) + std r4,24(r10) + std r4,32(r10) + std r4,40(r10) + std r4,48(r10) + std r4,56(r10) + addi r10,r10,64 + + .align 4 +4: bf 29,2f + std r4,0(r10) + std r4,8(r10) + std r4,16(r10) + std r4,24(r10) + addi r10,r10,32 + + .align 4 +2: bf 30,1f + std r4,0(r10) + std r4,8(r10) + addi r10,r10,16 + + .align 4 +1: bf 31,L(huge_dcbz_aligned) + std r4,0(r10) + addi r10,r10,8 + +L(huge_dcbz_aligned): + /* Setup dcbz unroll offsets and count numbers. */ + srdi r8,r5,9 + clrldi r11,r5,55 + cmpldi cr6,r11,0 + li r9,128 + cmpdi r8,0 + beq L(huge_tail) + li r7,256 + li r6,384 + mtctr r8 + + .align 4 +L(huge_loop): + /* Sets 512 bytes to zero in each iteration, the loop unrolling shows + a throughput boost for large sizes (2048 bytes or higher). */ + dcbz 0,r10 + dcbz r9,r10 + dcbz r7,r10 + dcbz r6,r10 + addi r10,r10,512 + bdnz L(huge_loop) + + beqlr cr6 + +L(huge_tail): + srdi r6,r11,8 + srdi r7,r11,4 + clrldi r8,r11,4 + cmpldi cr6,r8,0 + mtocrf 0x01,r6 + + beq cr6,L(tail) + + /* We have 1~511 bytes remaining. */ + .align 4 +32: bf 31,16f + dcbz 0,r10 + dcbz r9,r10 + addi r10,r10,256 + + .align 4 +16: mtocrf 0x01,r7 + bf 28,8f + dcbz 0,r10 + addi r10,r10,128 + + .align 4 +8: bf 29,4f + std r4,0(r10) + std r4,8(r10) + std r4,16(r10) + std r4,24(r10) + std r4,32(r10) + std r4,40(r10) + std r4,48(r10) + std r4,56(r10) + addi r10,r10,64 + + .align 4 +4: bf 30,2f + std r4,0(r10) + std r4,8(r10) + std r4,16(r10) + std r4,24(r10) + addi r10,r10,32 + + .align 4 +2: bf 31,L(tail) + std r4,0(r10) + std r4,8(r10) + addi r10,r10,16 + .align 4 + + /* Remaining 1~15 bytes. */ +L(tail): + mtocrf 0x01,r8 + + .align +8: bf 28,4f + std r4,0(r10) + addi r10,r10,8 + + .align 4 +4: bf 29,2f + stw r4,0(r10) + addi r10,r10,4 + + .align 4 +2: bf 30,1f + sth r4,0(r10) + addi r10,r10,2 + + .align 4 +1: bflr 31 + stb r4,0(r10) + blr + + /* Handle short copies of 0~31 bytes. Best throughput is achieved + by just unrolling all operations. */ + .align 4 +L(write_LT_32): + cmpldi cr6,5,8 + mtocrf 0x01,r5 + ble cr6,L(write_LE_8) + + /* At least 9 bytes to go. */ + neg r8,r4 + andi. r0,r8,3 + cmpldi cr1,r5,16 + beq L(write_LT_32_aligned) + + /* Force 4-byte alignment for SRC. */ + mtocrf 0x01,r0 + subf r5,r0,r5 + +2: bf 30,1f + sth r4,0(r10) + addi r10,r10,2 + +1: bf 31,L(end_4bytes_alignment) + stb r4,0(r10) + addi r10,r10,1 + + .align 4 +L(end_4bytes_alignment): + cmpldi cr1,r5,16 + mtocrf 0x01,r5 + +L(write_LT_32_aligned): + blt cr1,8f + + stw r4,0(r10) + stw r4,4(r10) + stw r4,8(r10) + stw r4,12(r10) + addi r10,r10,16 + +8: bf 28,L(tail4) + stw r4,0(r10) + stw r4,4(r10) + addi r10,r10,8 + + .align 4 + /* Copies 4~7 bytes. */ +L(tail4): + bf 29,L(tail2) + stw r4,0(r10) + bf 30,L(tail5) + sth r4,4(r10) + bflr 31 + stb r4,6(r10) + blr + + .align 4 + /* Copies 2~3 bytes. */ +L(tail2): + bf 30,1f + sth r4,0(r10) + bflr 31 + stb r4,2(r10) + blr + + .align 4 +L(tail5): + bflr 31 + stb r4,4(r10) + blr + + .align 4 +1: bflr 31 + stb r4,0(r10) + blr + + /* Handles copies of 0~8 bytes. */ + .align 4 +L(write_LE_8): + bne cr6,L(tail4) + + stw r4,0(r10) + stw r4,4(r10) + blr +END_GEN_TB (MEMSET,TB_TOCLESS) +libc_hidden_builtin_def (memset) + +/* Copied from bzero.S to prevent the linker from inserting a stub + between bzero and memset. */ +ENTRY (__bzero) + CALL_MCOUNT 3 + mr r5,r4 + li r4,0 + b L(_memset) +END (__bzero) +#ifndef __bzero +weak_alias (__bzero, bzero) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/multiarch/Implies new file mode 100644 index 0000000000..1fc7b7cd39 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power7/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/stpcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/stpcpy.S new file mode 100644 index 0000000000..955e738cee --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/stpcpy.S @@ -0,0 +1,24 @@ +/* Optimized stpcpy implementation for PowerPC64/POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STPCPY +#include <sysdeps/powerpc/powerpc64/power8/strcpy.S> + +weak_alias (__stpcpy, stpcpy) +libc_hidden_def (__stpcpy) +libc_hidden_builtin_def (stpcpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/stpncpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/stpncpy.S new file mode 100644 index 0000000000..c14d984dd0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/stpncpy.S @@ -0,0 +1,24 @@ +/* Optimized stpncpy implementation for PowerPC64/POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STPNCPY +#include <sysdeps/powerpc/powerpc64/power8/strncpy.S> + +weak_alias (__stpncpy, stpncpy) +libc_hidden_def (__stpncpy) +libc_hidden_builtin_def (stpncpy) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasecmp.S new file mode 100644 index 0000000000..88b17a6eb1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasecmp.S @@ -0,0 +1,457 @@ +/* Optimized strcasecmp implementation for PowerPC64. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <locale-defines.h> + +/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) */ + +#ifndef USE_AS_STRNCASECMP +# define __STRCASECMP __strcasecmp +# define STRCASECMP strcasecmp +#else +# define __STRCASECMP __strncasecmp +# define STRCASECMP strncasecmp +#endif +/* Convert 16 bytes to lowercase and compare */ +#define TOLOWER() \ + vaddubm v8, v4, v1; \ + vaddubm v7, v4, v3; \ + vcmpgtub v8, v8, v2; \ + vsel v4, v7, v4, v8; \ + vaddubm v8, v5, v1; \ + vaddubm v7, v5, v3; \ + vcmpgtub v8, v8, v2; \ + vsel v5, v7, v5, v8; \ + vcmpequb. v7, v5, v4; + +/* + * Get 16 bytes for unaligned case. + * reg1: Vector to hold next 16 bytes. + * reg2: Address to read from. + * reg3: Permute control vector. + * v8: Tmp vector used to mask unwanted bytes. + * v9: Tmp vector,0 when null is found on first 16 bytes + */ +#ifdef __LITTLE_ENDIAN__ +#define GET16BYTES(reg1, reg2, reg3) \ + lvx reg1, 0, reg2; \ + vspltisb v8, -1; \ + vperm v8, v8, reg1, reg3; \ + vcmpequb. v8, v0, v8; \ + beq cr6, 1f; \ + vspltisb v9, 0; \ + b 2f; \ + .align 4; \ +1: \ + addi r6, reg2, 16; \ + lvx v9, 0, r6; \ +2: \ + vperm reg1, v9, reg1, reg3; +#else +#define GET16BYTES(reg1, reg2, reg3) \ + lvx reg1, 0, reg2; \ + vspltisb v8, -1; \ + vperm v8, reg1, v8, reg3; \ + vcmpequb. v8, v0, v8; \ + beq cr6, 1f; \ + vspltisb v9, 0; \ + b 2f; \ + .align 4; \ +1: \ + addi r6, reg2, 16; \ + lvx v9, 0, r6; \ +2: \ + vperm reg1, reg1, v9, reg3; +#endif + +/* Check null in v4, v5 and convert to lower. */ +#define CHECKNULLANDCONVERT() \ + vcmpequb. v7, v0, v5; \ + beq cr6, 3f; \ + vcmpequb. v7, v0, v4; \ + beq cr6, 3f; \ + b L(null_found); \ + .align 4; \ +3: \ + TOLOWER() + +#ifdef _ARCH_PWR8 +# define VCLZD_V8_v7 vclzd v8, v7; +# define MFVRD_R3_V1 mfvrd r3, v1; +# define VSUBUDM_V9_V8 vsubudm v9, v9, v8; +# define VPOPCNTD_V8_V8 vpopcntd v8, v8; +# define VADDUQM_V7_V8 vadduqm v9, v7, v8; +#else +# define VCLZD_V8_v7 .long 0x11003fc2 +# define MFVRD_R3_V1 .long 0x7c230067 +# define VSUBUDM_V9_V8 .long 0x112944c0 +# define VPOPCNTD_V8_V8 .long 0x110047c3 +# define VADDUQM_V7_V8 .long 0x11274100 +#endif + + .machine power7 + +ENTRY (__STRCASECMP) +#ifdef USE_AS_STRNCASECMP + CALL_MCOUNT 3 +#else + CALL_MCOUNT 2 +#endif +#define rRTN r3 /* Return value */ +#define rSTR1 r10 /* 1st string */ +#define rSTR2 r4 /* 2nd string */ +#define rCHAR1 r6 /* Byte read from 1st string */ +#define rCHAR2 r7 /* Byte read from 2nd string */ +#define rADDR1 r8 /* Address of tolower(rCHAR1) */ +#define rADDR2 r12 /* Address of tolower(rCHAR2) */ +#define rLWR1 r8 /* Word tolower(rCHAR1) */ +#define rLWR2 r12 /* Word tolower(rCHAR2) */ +#define rTMP r9 +#define rLOC r11 /* Default locale address */ + + cmpd cr7, rRTN, rSTR2 + + /* Get locale address. */ + ld rTMP, __libc_tsd_LOCALE@got@tprel(r2) + add rLOC, rTMP, __libc_tsd_LOCALE@tls + ld rLOC, 0(rLOC) + + mr rSTR1, rRTN + li rRTN, 0 + beqlr cr7 +#ifdef USE_AS_STRNCASECMP + cmpdi cr7, r5, 0 + beq cr7, L(retnull) + cmpdi cr7, r5, 16 + blt cr7, L(bytebybyte) +#endif + vspltisb v0, 0 + vspltisb v8, -1 + /* Check for null in initial characters. + Check max of 16 char depending on the alignment. + If null is present, proceed byte by byte. */ + lvx v4, 0, rSTR1 +#ifdef __LITTLE_ENDIAN__ + lvsr v10, 0, rSTR1 /* Compute mask. */ + vperm v9, v8, v4, v10 /* Mask bits that are not part of string. */ +#else + lvsl v10, 0, rSTR1 + vperm v9, v4, v8, v10 +#endif + vcmpequb. v9, v0, v9 /* Check for null bytes. */ + bne cr6, L(bytebybyte) + lvx v5, 0, rSTR2 + /* Calculate alignment. */ +#ifdef __LITTLE_ENDIAN__ + lvsr v6, 0, rSTR2 + vperm v9, v8, v5, v6 /* Mask bits that are not part of string. */ +#else + lvsl v6, 0, rSTR2 + vperm v9, v5, v8, v6 +#endif + vcmpequb. v9, v0, v9 /* Check for null bytes. */ + bne cr6, L(bytebybyte) + /* Check if locale has non ascii characters. */ + ld rTMP, 0(rLOC) + addi r6, rTMP,LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES + lwz rTMP, 0(r6) + cmpdi cr7, rTMP, 1 + beq cr7, L(bytebybyte) + + /* Load vector registers with values used for TOLOWER. */ + /* Load v1 = 0xbf, v2 = 0x19 v3 = 0x20 in each byte. */ + vspltisb v3, 2 + vspltisb v9, 4 + vsl v3, v3, v9 + vaddubm v1, v3, v3 + vnor v1, v1, v1 + vspltisb v2, 7 + vsububm v2, v3, v2 + + andi. rADDR1, rSTR1, 0xF + beq cr0, L(align) + addi r6, rSTR1, 16 + lvx v9, 0, r6 + /* Compute 16 bytes from previous two loads. */ +#ifdef __LITTLE_ENDIAN__ + vperm v4, v9, v4, v10 +#else + vperm v4, v4, v9, v10 +#endif +L(align): + andi. rADDR2, rSTR2, 0xF + beq cr0, L(align1) + addi r6, rSTR2, 16 + lvx v9, 0, r6 + /* Compute 16 bytes from previous two loads. */ +#ifdef __LITTLE_ENDIAN__ + vperm v5, v9, v5, v6 +#else + vperm v5, v5, v9, v6 +#endif +L(align1): + CHECKNULLANDCONVERT() + blt cr6, L(match) + b L(different) + .align 4 +L(match): + clrldi r6, rSTR1, 60 + subfic r7, r6, 16 +#ifdef USE_AS_STRNCASECMP + sub r5, r5, r7 +#endif + add rSTR1, rSTR1, r7 + add rSTR2, rSTR2, r7 + andi. rADDR2, rSTR2, 0xF + addi rSTR1, rSTR1, -16 + addi rSTR2, rSTR2, -16 + beq cr0, L(aligned) +#ifdef __LITTLE_ENDIAN__ + lvsr v6, 0, rSTR2 +#else + lvsl v6, 0, rSTR2 +#endif + /* There are 2 loops depending on the input alignment. + Each loop gets 16 bytes from s1 and s2, check for null, + convert to lowercase and compare. Loop till difference + or null occurs. */ +L(s1_align): + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#ifdef USE_AS_STRNCASECMP + cmpdi cr7, r5, 16 + blt cr7, L(bytebybyte) + addi r5, r5, -16 +#endif + lvx v4, 0, rSTR1 + GET16BYTES(v5, rSTR2, v6) + CHECKNULLANDCONVERT() + blt cr6, L(s1_align) + b L(different) + .align 4 +L(aligned): + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#ifdef USE_AS_STRNCASECMP + cmpdi cr7, r5, 16 + blt cr7, L(bytebybyte) + addi r5, r5, -16 +#endif + lvx v4, 0, rSTR1 + lvx v5, 0, rSTR2 + CHECKNULLANDCONVERT() + blt cr6, L(aligned) + + /* Calculate and return the difference. */ +L(different): + vaddubm v1, v3, v3 + vcmpequb v7, v0, v7 +#ifdef __LITTLE_ENDIAN__ + /* Count trailing zero. */ + vspltisb v8, -1 + VADDUQM_V7_V8 + vandc v8, v9, v7 + VPOPCNTD_V8_V8 + vspltb v6, v8, 15 + vcmpequb. v6, v6, v1 + blt cr6, L(shift8) +#else + /* Count leading zero. */ + VCLZD_V8_v7 + vspltb v6, v8, 7 + vcmpequb. v6, v6, v1 + blt cr6, L(shift8) + vsro v8, v8, v1 +#endif + b L(skipsum) + .align 4 +L(shift8): + vsumsws v8, v8, v0 +L(skipsum): +#ifdef __LITTLE_ENDIAN__ + /* Shift registers based on leading zero count. */ + vsro v6, v5, v8 + vsro v7, v4, v8 + /* Merge and move to GPR. */ + vmrglb v6, v6, v7 + vslo v1, v6, v1 + MFVRD_R3_V1 + /* Place the characters that are different in first position. */ + sldi rSTR2, rRTN, 56 + srdi rSTR2, rSTR2, 56 + sldi rSTR1, rRTN, 48 + srdi rSTR1, rSTR1, 56 +#else + vslo v6, v5, v8 + vslo v7, v4, v8 + vmrghb v1, v6, v7 + MFVRD_R3_V1 + srdi rSTR2, rRTN, 48 + sldi rSTR2, rSTR2, 56 + srdi rSTR2, rSTR2, 56 + srdi rSTR1, rRTN, 56 +#endif + subf rRTN, rSTR1, rSTR2 + extsw rRTN, rRTN + blr + + .align 4 + /* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of junk beyond + the end of the strings... */ +L(null_found): + vaddubm v10, v3, v3 +#ifdef __LITTLE_ENDIAN__ + /* Count trailing zero. */ + vspltisb v8, -1 + VADDUQM_V7_V8 + vandc v8, v9, v7 + VPOPCNTD_V8_V8 + vspltb v6, v8, 15 + vcmpequb. v6, v6, v10 + blt cr6, L(shift_8) +#else + /* Count leading zero. */ + VCLZD_V8_v7 + vspltb v6, v8, 7 + vcmpequb. v6, v6, v10 + blt cr6, L(shift_8) + vsro v8, v8, v10 +#endif + b L(skipsum1) + .align 4 +L(shift_8): + vsumsws v8, v8, v0 +L(skipsum1): + /* Calculate shift count based on count of zero. */ + vspltisb v10, 7 + vslb v10, v10, v10 + vsldoi v9, v0, v10, 1 + VSUBUDM_V9_V8 + vspltisb v8, 8 + vsldoi v8, v0, v8, 1 + VSUBUDM_V9_V8 + /* Shift and remove junk after null character. */ +#ifdef __LITTLE_ENDIAN__ + vslo v5, v5, v9 + vslo v4, v4, v9 +#else + vsro v5, v5, v9 + vsro v4, v4, v9 +#endif + /* Convert and compare 16 bytes. */ + TOLOWER() + blt cr6, L(retnull) + b L(different) + .align 4 +L(retnull): + li rRTN, 0 + blr + .align 4 +L(bytebybyte): + /* Unrolling loop for POWER: loads are done with 'lbz' plus + offset and string descriptors are only updated in the end + of loop unrolling. */ + ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC) + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ +#ifdef USE_AS_STRNCASECMP + rldicl rTMP, r5, 62, 2 + cmpdi cr7, rTMP, 0 + beq cr7, L(lessthan4) + mtctr rTMP +#endif +L(loop): + cmpdi rCHAR1, 0 /* *s1 == '\0' ? */ + sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */ + sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */ + lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */ + lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */ + cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */ + crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */ + beq cr1, L(done) + lbz rCHAR1, 1(rSTR1) + lbz rCHAR2, 1(rSTR2) + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 2(rSTR1) + lbz rCHAR2, 2(rSTR2) + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 3(rSTR1) + lbz rCHAR2, 3(rSTR2) + cmpdi rCHAR1, 0 + /* Increment both string descriptors */ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ +#ifdef USE_AS_STRNCASECMP + bdnz L(loop) +#else + b L(loop) +#endif +#ifdef USE_AS_STRNCASECMP +L(lessthan4): + clrldi r5, r5, 62 + cmpdi cr7, r5, 0 + beq cr7, L(retnull) + mtctr r5 +L(loop1): + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + addi rSTR1, rSTR1, 1 + addi rSTR2, rSTR2, 1 + lbz rCHAR1, 0(rSTR1) + lbz rCHAR2, 0(rSTR2) + bdnz L(loop1) +#endif +L(done): + subf r0, rLWR2, rLWR1 + extsw rRTN, r0 + blr +END (__STRCASECMP) + +weak_alias (__STRCASECMP, STRCASECMP) +libc_hidden_builtin_def (__STRCASECMP) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasestr-ppc64.c b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasestr-ppc64.c new file mode 100644 index 0000000000..0e746b7718 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasestr-ppc64.c @@ -0,0 +1,29 @@ +/* Optimized strcasestr implementation for PowerPC64/POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#define STRCASESTR __strcasestr_ppc +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(__name) + +#undef weak_alias +#define weak_alias(a,b) +extern __typeof (strcasestr) __strcasestr_ppc attribute_hidden; + +#include <string/strcasestr.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasestr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasestr.S new file mode 100644 index 0000000000..6ac6572f3b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcasestr.S @@ -0,0 +1,538 @@ +/* Optimized strcasestr implementation for PowerPC64/POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <locale-defines.h> + +/* Char * [r3] strcasestr (char *s [r3], char * pat[r4]) */ + +/* The performance gain is obtained by comparing 16 bytes. */ + +/* When the first char of r4 is hit ITERATIONS times in r3 + fallback to default. */ +#define ITERATIONS 64 + +#ifndef STRCASESTR +# define STRCASESTR __strcasestr +#endif + +#ifndef STRLEN +/* For builds without IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define STRLEN __GI_strlen +# else +# define STRLEN strlen +# endif +#endif + +#ifndef STRNLEN +/* For builds without IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define STRNLEN __GI_strnlen +# else +# define STRNLEN __strnlen +# endif +#endif + +#ifndef STRCHR +# ifdef SHARED +# define STRCHR __GI_strchr +# else +# define STRCHR strchr +# endif +#endif + +/* Convert 16 bytes of v4 and reg to lowercase and compare. */ +#define TOLOWER(reg) \ + vcmpgtub v6, v4, v1; \ + vcmpgtub v7, v2, v4; \ + vand v8, v7, v6; \ + vand v8, v8, v3; \ + vor v4, v8, v4; \ + vcmpgtub v6, reg, v1; \ + vcmpgtub v7, v2, reg; \ + vand v8, v7, v6; \ + vand v8, v8, v3; \ + vor reg, v8, reg; \ + vcmpequb. v6, reg, v4; + +/* TODO: change these to the actual instructions when the minimum required + binutils allows it. */ +#ifdef _ARCH_PWR8 +#define VCLZD_V8_v7 vclzd v8, v7; +#else +#define VCLZD_V8_v7 .long 0x11003fc2 +#endif + +#define FRAMESIZE (FRAME_MIN_SIZE+48) +/* TODO: change this to .machine power8 when the minimum required binutils + allows it. */ + .machine power7 +EALIGN (STRCASESTR, 4, 0) + CALL_MCOUNT 2 + mflr r0 /* Load link register LR to r0. */ + std r31, -8(r1) /* Save callers register r31. */ + std r30, -16(r1) /* Save callers register r30. */ + std r29, -24(r1) /* Save callers register r29. */ + std r28, -32(r1) /* Save callers register r28. */ + std r27, -40(r1) /* Save callers register r27. */ + std r0, 16(r1) /* Store the link register. */ + cfi_offset(r31, -8) + cfi_offset(r30, -16) + cfi_offset(r29, -24) + cfi_offset(r28, -32) + cfi_offset(r27, -40) + cfi_offset(lr, 16) + stdu r1, -FRAMESIZE(r1) /* Create the stack frame. */ + cfi_adjust_cfa_offset(FRAMESIZE) + + dcbt 0, r3 + dcbt 0, r4 + cmpdi cr7, r3, 0 /* Input validation. */ + beq cr7, L(retnull) + cmpdi cr7, r4, 0 + beq cr7, L(retnull) + + mr r29, r3 + mr r30, r4 + /* Load first byte from r4 and check if its null. */ + lbz r6, 0(r4) + cmpdi cr7, r6, 0 + beq cr7, L(ret_r3) + + ld r10, __libc_tsd_LOCALE@got@tprel(r2) + add r9, r10, __libc_tsd_LOCALE@tls + ld r9, 0(r9) + ld r9, LOCALE_CTYPE_TOUPPER(r9) + sldi r10, r6, 2 /* Convert to upper case. */ + lwzx r28, r9, r10 + + ld r10, __libc_tsd_LOCALE@got@tprel(r2) + add r11, r10, __libc_tsd_LOCALE@tls + ld r11, 0(r11) + ld r11, LOCALE_CTYPE_TOLOWER(r11) + sldi r10, r6, 2 /* Convert to lower case. */ + lwzx r27, r11, r10 + + /* Check if the first char is present. */ + mr r4, r27 + bl STRCHR + nop + mr r5, r3 + mr r3, r29 + mr r29, r5 + mr r4, r28 + bl STRCHR + nop + cmpdi cr7, r29, 0 + beq cr7, L(firstpos) + cmpdi cr7, r3, 0 + beq cr7, L(skipcheck) + cmpw cr7, r3, r29 + ble cr7, L(firstpos) + /* Move r3 to the first occurence. */ +L(skipcheck): + mr r3, r29 +L(firstpos): + mr r29, r3 + + sldi r9, r27, 8 + or r28, r9, r28 + /* Reg r27 is used to count the number of iterations. */ + li r27, 0 + /* If first char of search str is not present. */ + cmpdi cr7, r3, 0 + ble cr7, L(end) + + /* Find the length of pattern. */ + mr r3, r30 + bl STRLEN + nop + + cmpdi cr7, r3, 0 /* If search str is null. */ + beq cr7, L(ret_r3) + + mr r31, r3 + mr r4, r3 + mr r3, r29 + bl STRNLEN + nop + + cmpd cr7, r3, r31 /* If len(r3) < len(r4). */ + blt cr7, L(retnull) + + mr r3, r29 + + /* Locales not matching ASCII for single bytes. */ + ld r10, __libc_tsd_LOCALE@got@tprel(r2) + add r9, r10, __libc_tsd_LOCALE@tls + ld r9, 0(r9) + ld r7, 0(r9) + addi r7, r7, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES + lwz r8, 0(r7) + cmpdi cr7, r8, 1 + beq cr7, L(bytebybyte) + + /* If len(r4) < 16 handle byte by byte. */ + /* For shorter strings we will not use vector registers. */ + cmpdi cr7, r31, 16 + blt cr7, L(bytebybyte) + + /* Comparison values used for TOLOWER. */ + /* Load v1 = 64('A' - 1), v2 = 91('Z' + 1), v3 = 32 in each byte. */ + vspltish v0, 0 + vspltisb v5, 2 + vspltisb v4, 4 + vsl v3, v5, v4 + vaddubm v1, v3, v3 + vspltisb v5, 15 + vaddubm v2, v5, v5 + vaddubm v2, v1, v2 + vspltisb v4, -3 + vaddubm v2, v2, v4 + + /* + 1. Load 16 bytes from r3 and r4 + 2. Check if there is null, If yes, proceed byte by byte path. + 3. Else,Convert both to lowercase and compare. + 4. If they are same proceed to 1. + 5. If they dont match, find if first char of r4 is present in the + loaded 16 byte of r3. + 6. If yes, move position, load next 16 bytes of r3 and proceed to 2. + */ + + mr r8, r3 /* Save r3 for future use. */ + mr r4, r30 /* Restore r4. */ + clrldi r10, r4, 60 + lvx v5, 0, r4 /* Load 16 bytes from r4. */ + cmpdi cr7, r10, 0 + beq cr7, L(begin2) + /* If r4 is unaligned, load another 16 bytes. */ +#ifdef __LITTLE_ENDIAN__ + lvsr v7, 0, r4 +#else + lvsl v7, 0, r4 +#endif + addi r5, r4, 16 + lvx v9, 0, r5 +#ifdef __LITTLE_ENDIAN__ + vperm v5, v9, v5, v7 +#else + vperm v5, v5, v9, v7 +#endif +L(begin2): + lvx v4, 0, r3 + vcmpequb. v7, v0, v4 /* Check for null. */ + beq cr6, L(nullchk6) + b L(trailcheck) + + .align 4 +L(nullchk6): + clrldi r10, r3, 60 + cmpdi cr7, r10, 0 + beq cr7, L(next16) +#ifdef __LITTLE_ENDIAN__ + lvsr v7, 0, r3 +#else + lvsl v7, 0, r3 +#endif + addi r5, r3, 16 + /* If r3 is unaligned, load another 16 bytes. */ + lvx v10, 0, r5 +#ifdef __LITTLE_ENDIAN__ + vperm v4, v10, v4, v7 +#else + vperm v4, v4, v10, v7 +#endif +L(next16): + vcmpequb. v6, v0, v5 /* Check for null. */ + beq cr6, L(nullchk) + b L(trailcheck) + + .align 4 +L(nullchk): + vcmpequb. v6, v0, v4 + beq cr6, L(nullchk1) + b L(retnull) + + .align 4 +L(nullchk1): + /* Convert both v3 and v4 to lower. */ + TOLOWER(v5) + /* If both are same, branch to match. */ + blt cr6, L(match) + /* Find if the first char is present in next 15 bytes. */ +#ifdef __LITTLE_ENDIAN__ + vspltb v6, v5, 15 + vsldoi v7, v0, v4, 15 +#else + vspltb v6, v5, 0 + vspltisb v7, 8 + vslo v7, v4, v7 +#endif + vcmpequb v7, v6, v7 + vcmpequb. v6, v0, v7 + /* Shift r3 by 16 bytes and proceed. */ + blt cr6, L(shift16) + VCLZD_V8_v7 +#ifdef __LITTLE_ENDIAN__ + vspltb v6, v8, 15 +#else + vspltb v6, v8, 7 +#endif + vcmpequb. v6, v6, v1 + /* Shift r3 by 8 bytes and proceed. */ + blt cr6, L(shift8) + b L(begin) + + .align 4 +L(match): + /* There is a match of 16 bytes, check next bytes. */ + cmpdi cr7, r31, 16 + mr r29, r3 + beq cr7, L(ret_r3) + +L(secondmatch): + addi r3, r3, 16 + addi r4, r4, 16 + /* Load next 16 bytes of r3 and r4 and compare. */ + clrldi r10, r4, 60 + cmpdi cr7, r10, 0 + beq cr7, L(nextload) + /* Handle unaligned case. */ + vor v6, v9, v9 + vcmpequb. v7, v0, v6 + beq cr6, L(nullchk2) + b L(trailcheck) + + .align 4 +L(nullchk2): +#ifdef __LITTLE_ENDIAN__ + lvsr v7, 0, r4 +#else + lvsl v7, 0, r4 +#endif + addi r5, r4, 16 + /* If r4 is unaligned, load another 16 bytes. */ + lvx v9, 0, r5 +#ifdef __LITTLE_ENDIAN__ + vperm v11, v9, v6, v7 +#else + vperm v11, v6, v9, v7 +#endif + b L(compare) + + .align 4 +L(nextload): + lvx v11, 0, r4 +L(compare): + vcmpequb. v7, v0, v11 + beq cr6, L(nullchk3) + b L(trailcheck) + + .align 4 +L(nullchk3): + clrldi r10, r3, 60 + cmpdi cr7, r10, 0 + beq cr7, L(nextload1) + /* Handle unaligned case. */ + vor v4, v10, v10 + vcmpequb. v7, v0, v4 + beq cr6, L(nullchk4) + b L(retnull) + + .align 4 +L(nullchk4): +#ifdef __LITTLE_ENDIAN__ + lvsr v7, 0, r3 +#else + lvsl v7, 0, r3 +#endif + addi r5, r3, 16 + /* If r3 is unaligned, load another 16 bytes. */ + lvx v10, 0, r5 +#ifdef __LITTLE_ENDIAN__ + vperm v4, v10, v4, v7 +#else + vperm v4, v4, v10, v7 +#endif + b L(compare1) + + .align 4 +L(nextload1): + lvx v4, 0, r3 +L(compare1): + vcmpequb. v7, v0, v4 + beq cr6, L(nullchk5) + b L(retnull) + + .align 4 +L(nullchk5): + /* Convert both v3 and v4 to lower. */ + TOLOWER(v11) + /* If both are same, branch to secondmatch. */ + blt cr6, L(secondmatch) + /* Continue the search. */ + b L(begin) + + .align 4 +L(trailcheck): + ld r10, __libc_tsd_LOCALE@got@tprel(r2) + add r11, r10, __libc_tsd_LOCALE@tls + ld r11, 0(r11) + ld r11, LOCALE_CTYPE_TOLOWER(r11) +L(loop2): + lbz r5, 0(r3) /* Load byte from r3. */ + lbz r6, 0(r4) /* Load next byte from r4. */ + cmpdi cr7, r6, 0 /* Is it null? */ + beq cr7, L(updater3) + cmpdi cr7, r5, 0 /* Is it null? */ + beq cr7, L(retnull) /* If yes, return. */ + addi r3, r3, 1 + addi r4, r4, 1 /* Increment r4. */ + sldi r10, r5, 2 /* Convert to lower case. */ + lwzx r10, r11, r10 + sldi r7, r6, 2 /* Convert to lower case. */ + lwzx r7, r11, r7 + cmpw cr7, r7, r10 /* Compare with byte from r4. */ + bne cr7, L(begin) + b L(loop2) + + .align 4 +L(shift8): + addi r8, r8, 7 + b L(begin) + .align 4 +L(shift16): + addi r8, r8, 15 + .align 4 +L(begin): + addi r8, r8, 1 + mr r3, r8 + /* When our iterations exceed ITERATIONS,fall back to default. */ + addi r27, r27, 1 + cmpdi cr7, r27, ITERATIONS + beq cr7, L(default) + mr r4, r30 /* Restore r4. */ + b L(begin2) + + /* Handling byte by byte. */ + .align 4 +L(loop1): + mr r3, r8 + addi r27, r27, 1 + cmpdi cr7, r27, ITERATIONS + beq cr7, L(default) + mr r29, r8 + srdi r4, r28, 8 + /* Check if the first char is present. */ + bl STRCHR + nop + mr r5, r3 + mr r3, r29 + mr r29, r5 + sldi r4, r28, 56 + srdi r4, r4, 56 + bl STRCHR + nop + cmpdi cr7, r29, 0 + beq cr7, L(nextpos) + cmpdi cr7, r3, 0 + beq cr7, L(skipcheck1) + cmpw cr7, r3, r29 + ble cr7, L(nextpos) + /* Move r3 to first occurence. */ +L(skipcheck1): + mr r3, r29 +L(nextpos): + mr r29, r3 + cmpdi cr7, r3, 0 + ble cr7, L(retnull) +L(bytebybyte): + ld r10, __libc_tsd_LOCALE@got@tprel(r2) + add r11, r10, __libc_tsd_LOCALE@tls + ld r11, 0(r11) + ld r11, LOCALE_CTYPE_TOLOWER(r11) + mr r4, r30 /* Restore r4. */ + mr r8, r3 /* Save r3. */ + addi r8, r8, 1 + +L(loop): + addi r3, r3, 1 + lbz r5, 0(r3) /* Load byte from r3. */ + addi r4, r4, 1 /* Increment r4. */ + lbz r6, 0(r4) /* Load next byte from r4. */ + cmpdi cr7, r6, 0 /* Is it null? */ + beq cr7, L(updater3) + cmpdi cr7, r5, 0 /* Is it null? */ + beq cr7, L(retnull) /* If yes, return. */ + sldi r10, r5, 2 /* Convert to lower case. */ + lwzx r10, r11, r10 + sldi r7, r6, 2 /* Convert to lower case. */ + lwzx r7, r11, r7 + cmpw cr7, r7, r10 /* Compare with byte from r4. */ + bne cr7, L(loop1) + b L(loop) + + /* Handling return values. */ + .align 4 +L(updater3): + subf r3, r31, r3 /* Reduce r31 (len of r4) from r3. */ + b L(end) + + .align 4 +L(ret_r3): + mr r3, r29 /* Return point of match. */ + b L(end) + + .align 4 +L(retnull): + li r3, 0 /* Substring was not found. */ + b L(end) + + .align 4 +L(default): + mr r4, r30 + bl __strcasestr_ppc + nop + + .align 4 +L(end): + addi r1, r1, FRAMESIZE /* Restore stack pointer. */ + cfi_adjust_cfa_offset(-FRAMESIZE) + ld r0, 16(r1) /* Restore the saved link register. */ + ld r27, -40(r1) + ld r28, -32(r1) + ld r29, -24(r1) /* Restore callers save register r29. */ + ld r30, -16(r1) /* Restore callers save register r30. */ + ld r31, -8(r1) /* Restore callers save register r31. */ + cfi_restore(lr) + cfi_restore(r27) + cfi_restore(r28) + cfi_restore(r29) + cfi_restore(r30) + cfi_restore(r31) + mtlr r0 /* Branch to link register. */ + blr +END (STRCASESTR) + +weak_alias (__strcasestr, strcasestr) +libc_hidden_def (__strcasestr) +libc_hidden_builtin_def (strcasestr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strchr.S new file mode 100644 index 0000000000..e0c185c162 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strchr.S @@ -0,0 +1,377 @@ +/* Optimized strchr implementation for PowerPC64/POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifdef USE_AS_STRCHRNUL +# ifndef STRCHRNUL +# define FUNC_NAME __strchrnul +# else +# define FUNC_NAME STRCHRNUL +# endif +#else +# ifndef STRCHR +# define FUNC_NAME strchr +# else +# define FUNC_NAME STRCHR +# endif +#endif /* !USE_AS_STRCHRNUL */ + +/* int [r3] strchr (char *s [r3], int c [r4]) */ +/* TODO: change these to the actual instructions when the minimum required + binutils allows it. */ +#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) +#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) +#define VBPERMQ(t,a,b) .long (0x1000054c \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) +/* TODO: change this to .machine power8 when the minimum required binutils + allows it. */ + .machine power7 +ENTRY (FUNC_NAME) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + cmpdi cr7,r4,0 + ld r12,0(r8) /* Load doubleword from memory. */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + + beq cr7,L(null_match) + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + /* Now r4 has a doubleword of c bytes and r0 has + a doubleword of null bytes. */ + + cmpb r10,r12,r4 /* Compare each byte against c byte. */ + cmpb r11,r12,r0 /* Compare each byte against null byte. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + srd r11,r11,r6 + sld r10,r10,r6 + sld r11,r11,r6 +#else + sld r10,r10,r6 + sld r11,r11,r6 + srd r10,r10,r6 + srd r11,r11,r6 +#endif + or r5,r10,r11 /* OR the results to speed things up. */ + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + + mtcrf 0x01,r8 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r9,16(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + cmpb r6,r9,r4 + cmpb r7,r9,r0 + or r5,r10,r11 + or r9,r6,r7 + or r12,r5,r9 + cmpdi cr7,r12,0 + beq cr7,L(vector) + /* OK, one (or both) of the doublewords contains a c/null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c/null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The c/null byte must be in the second doubleword. Adjust the + address again and move the result of cmpb to r10 so we can calculate + the pointer. */ + + mr r10,r6 + mr r11,r7 + addi r8,r8,8 +#ifdef USE_AS_STRCHRNUL + mr r5, r9 +#endif + /* r10/r11 have the output of the cmpb instructions, that is, + 0xff in the same position as the c/null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done): +#ifdef USE_AS_STRCHRNUL + mr r10, r5 +#endif +#ifdef __LITTLE_ENDIAN__ + addi r3,r10,-1 + andc r3,r3,r10 + popcntd r0,r3 +# ifndef USE_AS_STRCHRNUL + addi r4,r11,-1 + andc r4,r4,r11 + cmpld cr7,r3,r4 + bgt cr7,L(no_match) +# endif +#else + cntlzd r0,r10 /* Count leading zeros before c matches. */ +# ifndef USE_AS_STRCHRNUL + cmpld cr7,r11,r10 + bgt cr7,L(no_match) +# endif +#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching c byte + or null in case c was not found. */ + blr + + /* Check the first 32B in GPR's and move to vectorized loop. */ + .p2align 5 +L(vector): + addi r3, r8, 8 + andi. r10, r3, 31 + bne cr0, L(loop) + vspltisb v0, 0 + /* Precompute vbpermq constant. */ + vspltisb v10, 3 + lvsl v11, r0, r0 + vslb v10, v11, v10 + MTVRD(v1,r4) + li r5, 16 + vspltb v1, v1, 7 + /* Compare 32 bytes in each loop. */ +L(continue): + lvx v4, 0, r3 + lvx v5, r3, r5 + vcmpequb v2, v0, v4 + vcmpequb v3, v0, v5 + vcmpequb v6, v1, v4 + vcmpequb v7, v1, v5 + vor v8, v2, v3 + vor v9, v6, v7 + vor v11, v8, v9 + vcmpequb. v11, v0, v11 + addi r3, r3, 32 + blt cr6, L(continue) + /* One (or both) of the quadwords contains a c/null byte. */ + addi r3, r3, -32 +#ifndef USE_AS_STRCHRNUL + vcmpequb. v11, v0, v9 + blt cr6, L(no_match) +#endif + /* Permute the first bit of each byte into bits 48-63. */ + VBPERMQ(v2, v2, v10) + VBPERMQ(v3, v3, v10) + VBPERMQ(v6, v6, v10) + VBPERMQ(v7, v7, v10) + /* Shift each component into its correct position for merging. */ +#ifdef __LITTLE_ENDIAN__ + vsldoi v3, v3, v3, 2 + vsldoi v7, v7, v7, 2 +#else + vsldoi v2, v2, v2, 6 + vsldoi v3, v3, v3, 4 + vsldoi v6, v6, v6, 6 + vsldoi v7, v7, v7, 4 +#endif + + /* Merge the results and move to a GPR. */ + vor v1, v3, v2 + vor v2, v6, v7 + vor v4, v1, v2 + MFVRD(r5, v4) +#ifdef __LITTLE_ENDIAN__ + addi r6, r5, -1 + andc r6, r6, r5 + popcntd r6, r6 +#else + cntlzd r6, r5 /* Count leading zeros before the match. */ +#endif + add r3, r3, r6 /* Compute final length. */ + /* Return NULL if null found before c. */ +#ifndef USE_AS_STRCHRNUL + lbz r4, 0(r3) + cmpdi cr7, r4, 0 + beq cr7, L(no_match) +#endif + blr + +#ifndef USE_AS_STRCHRNUL + .align 4 +L(no_match): + li r3,0 + blr +#endif + +/* We are here because strchr was called with a null byte. */ + .align 4 +L(null_match): + /* r0 has a doubleword of null bytes. */ + + cmpb r5,r12,r0 /* Compare each byte against null bytes. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r5,r5,r6 + sld r5,r5,r6 +#else + sld r5,r5,r6 + srd r5,r5,r6 +#endif + cmpdi cr7,r5,0 /* If r10 == 0, no c or null bytes + have been found. */ + bne cr7,L(done_null) + + mtcrf 0x01,r8 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 28,L(loop_null) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r5,r12,r0 + cmpdi cr7,r5,0 + bne cr7,L(done_null) + b L(loop_null) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop_null): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r5,r12,r0 + cmpb r10,r11,r0 + or r6,r5,r10 + cmpdi cr7,r6,0 + beq cr7,L(vector1) + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done_null) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + + mr r5,r10 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done_null): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert leading zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching null byte. */ + blr + .p2align 5 +L(vector1): + addi r3, r8, 8 + andi. r10, r3, 31 + bne cr0, L(loop_null) + vspltisb v8, -1 + vspltisb v0, 0 + vspltisb v10, 3 + lvsl v11, r0, r0 + vslb v10, v11, v10 + li r5, 16 +L(continue1): + lvx v4, 0, r3 + lvx v5, r3, r5 + vcmpequb v2, v0, v4 + vcmpequb v3, v0, v5 + vor v8, v2, v3 + vcmpequb. v11, v0, v8 + addi r3, r3, 32 + blt cr6, L(continue1) + addi r3, r3, -32 +L(end1): + VBPERMQ(v2, v2, v10) + VBPERMQ(v3, v3, v10) + /* Shift each component into its correct position for merging. */ +#ifdef __LITTLE_ENDIAN__ + vsldoi v3, v3, v3, 2 +#else + vsldoi v2, v2, v2, 6 + vsldoi v3, v3, v3, 4 +#endif + + /* Merge the results and move to a GPR. */ + vor v4, v3, v2 + MFVRD(r5, v4) +#ifdef __LITTLE_ENDIAN__ + addi r6, r5, -1 + andc r6, r6, r5 + popcntd r6, r6 +#else + cntlzd r6, r5 /* Count leading zeros before the match. */ +#endif + add r3, r3, r6 /* Compute final length. */ + blr +END (FUNC_NAME) + +#ifndef USE_AS_STRCHRNUL +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strchrnul.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strchrnul.S new file mode 100644 index 0000000000..3bf4b275dd --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strchrnul.S @@ -0,0 +1,23 @@ +/* Optimized strchrnul implementation for PowerPC64/POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STRCHRNUL 1 +#include <sysdeps/powerpc/powerpc64/power8/strchr.S> + +weak_alias (__strchrnul,strchrnul) +libc_hidden_builtin_def (__strchrnul) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcmp.S new file mode 100644 index 0000000000..770484f1e1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcmp.S @@ -0,0 +1,247 @@ +/* Optimized strcmp implementation for PowerPC64/POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRCMP +# define STRCMP strcmp +#endif + +/* Implements the function + + size_t [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) + + The implementation uses unaligned doubleword access to avoid specialized + code paths depending of data alignment. Although recent powerpc64 uses + 64K as default, the page cross handling assumes minimum page size of + 4k. */ + +EALIGN (STRCMP, 4, 0) + li r0,0 + + /* Check if [s1]+16 or [s2]+16 will cross a 4K page boundary using + the code: + + (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE)) + + with PAGE_SIZE being 4096 and ITER_SIZE begin 16. */ + + rldicl r7,r3,0,52 + rldicl r9,r4,0,52 + cmpldi cr7,r7,4096-16 + bgt cr7,L(pagecross_check) + cmpldi cr5,r9,4096-16 + bgt cr5,L(pagecross_check) + + /* For short string up to 16 bytes, load both s1 and s2 using + unaligned dwords and compare. */ + ld r8,0(r3) + ld r10,0(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + + ld r8,8(r3) + ld r10,8(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + + addi r7,r3,16 + addi r4,r4,16 + +L(align_8b): + /* Now it has checked for first 16 bytes, align source1 to doubleword + and adjust source2 address. */ + rldicl r9,r7,0,61 /* source1 alignment to doubleword */ + subf r4,r9,r4 /* Adjust source2 address based on source1 + alignment. */ + rldicr r7,r7,0,60 /* Align source1 to doubleword. */ + + /* At this point, source1 alignment is 0 and source2 alignment is + between 0 and 7. Check is source2 alignment is 0, meaning both + sources have the same alignment. */ + andi. r9,r4,0x7 + bne cr0,L(loop_diff_align) + + /* If both source1 and source2 are doubleword aligned, there is no + need for page boundary cross checks. */ + + ld r8,0(r7) + ld r10,0(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + + .align 4 +L(loop_equal_align): + ld r8,8(r7) + ld r10,8(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + + ld r8,16(r7) + ld r10,16(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + + ldu r8,24(r7) + ldu r10,24(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + + b L(loop_equal_align) + + /* A zero byte was found in r8 (s1 dword), r9 contains the cmpb + result and r10 the dword from s2. To code isolate the byte + up to end (including the '\0'), masking with 0xFF the remaining + ones: + + #if __LITTLE_ENDIAN__ + (__builtin_ffsl (x) - 1) = counting trailing zero bits + r9 = (__builtin_ffsl (r9) - 1) + 8; + r9 = -1UL << r9 + #else + r9 = __builtin_clzl (r9) + 8; + r9 = -1UL >> r9 + #endif + r8 = r8 | r9 + r10 = r10 | r9 */ + +#ifdef __LITTLE_ENDIAN__ + nor r9,r9,r9 +L(different_nocmpb): + neg r3,r9 + and r9,r9,r3 + cntlzd r9,r9 + subfic r9,r9,63 +#else + not r9,r9 +L(different_nocmpb): + cntlzd r9,r9 + subfic r9,r9,56 +#endif + srd r3,r8,r9 + srd r10,r10,r9 + rldicl r10,r10,0,56 + rldicl r3,r3,0,56 + subf r3,r10,r3 + extsw r3,r3 + blr + + .align 4 +L(pagecross_check): + subfic r9,r9,4096 + subfic r7,r7,4096 + cmpld cr7,r7,r9 + bge cr7,L(pagecross) + mr r7,r9 + + /* If unaligned 16 bytes reads across a 4K page boundary, it uses + a simple byte a byte comparison until the page alignment for s1 + is reached. */ +L(pagecross): + add r7,r3,r7 + subf r9,r3,r7 + mtctr r9 + + .align 4 +L(pagecross_loop): + /* Loads a byte from s1 and s2, compare if *s1 is equal to *s2 + and if *s1 is '\0'. */ + lbz r9,0(r3) + lbz r10,0(r4) + addi r3,r3,1 + addi r4,r4,1 + cmplw cr7,r9,r10 + cmpdi cr5,r9,r0 + bne cr7,L(pagecross_ne) + beq cr5,L(pagecross_nullfound) + bdnz L(pagecross_loop) + b L(align_8b) + + .align 4 + /* The unaligned read of source2 will cross a 4K page boundary, + and the different byte or NULL maybe be in the remaining page + bytes. Since it can not use the unaligned load, the algorithm + reads and compares 8 bytes to keep source1 doubleword aligned. */ +L(check_source2_byte): + li r9,8 + mtctr r9 + + .align 4 +L(check_source2_byte_loop): + lbz r9,0(r7) + lbz r10,0(r4) + addi r7,r7,1 + addi r4,r4,1 + cmplw cr7,r9,10 + cmpdi r5,r9,0 + bne cr7,L(pagecross_ne) + beq cr5,L(pagecross_nullfound) + bdnz L(check_source2_byte_loop) + + /* If source2 is unaligned to doubleword, the code needs to check + on each interation if the unaligned doubleword access will cross + a 4k page boundary. */ + .align 5 +L(loop_unaligned): + ld r8,0(r7) + ld r10,0(r4) + cmpb r12,r8,r0 + cmpb r11,r8,r10 + orc. r9,r12,r11 + bne cr0,L(different_nocmpb) + addi r7,r7,8 + addi r4,r4,8 + +L(loop_diff_align): + /* Check if [src2]+8 cross a 4k page boundary: + + srcin2 % PAGE_SIZE > (PAGE_SIZE - 8) + + with PAGE_SIZE being 4096. */ + rldicl r9,r4,0,52 + cmpldi cr7,r9,4088 + ble cr7,L(loop_unaligned) + b L(check_source2_byte) + + .align 4 +L(pagecross_ne): + extsw r3,r9 + mr r9,r10 +L(pagecross_retdiff): + subf r9,r9,r3 + extsw r3,r9 + blr + + .align 4 +L(pagecross_nullfound): + li r3,0 + b L(pagecross_retdiff) +END (STRCMP) +libc_hidden_builtin_def (strcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcpy.S new file mode 100644 index 0000000000..7f2cee4b1b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcpy.S @@ -0,0 +1,270 @@ +/* Optimized strcpy/stpcpy implementation for PowerPC64/POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifdef USE_AS_STPCPY +# ifndef STPCPY +# define FUNC_NAME __stpcpy +# else +# define FUNC_NAME STPCPY +# endif +#else +# ifndef STRCPY +# define FUNC_NAME strcpy +# else +# define FUNC_NAME STRCPY +# endif +#endif /* !USE_AS_STPCPY */ + +/* Implements the function + + char * [r3] strcpy (char *dest [r3], const char *src [r4]) + + or + + char * [r3] stpcpy (char *dest [r3], const char *src [r4]) + + if USE_AS_STPCPY is defined. + + The implementation uses unaligned doubleword access to avoid specialized + code paths depending of data alignment. Although recent powerpc64 uses + 64K as default, the page cross handling assumes minimum page size of + 4k. */ + + .machine power7 +EALIGN (FUNC_NAME, 4, 0) + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + /* Check if the [src]+15 will cross a 4K page by checking if the bit + indicating the page size changes. Basically: + + uint64_t srcin = (uint64_t)src; + uint64_t ob = srcin & 4096UL; + uint64_t nb = (srcin+15UL) & 4096UL; + if (ob ^ nb) + goto pagecross; */ + + addi r9,r4,15 + xor r9,r9,r4 + rlwinm. r9,r9,0,19,19 + bne L(pagecross) + + /* For short string (less than 16 bytes), just calculate its size as + strlen and issues a memcpy if null is found. */ + mr r7,r4 + ld r12,0(r7) /* Load doubleword from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */ + cmpdi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + ldu r8,8(r7) + cmpb r10,r8,r0 + cmpdi cr7,r10,0 + bne cr7,L(done) + + b L(loop_before) + + .align 4 +L(pagecross): + clrrdi r7,r4,3 /* Align the address to doubleword boundary. */ + rlwinm r6,r4,3,26,28 /* Calculate padding. */ + li r5,-1 /* MASK = 0xffffffffffffffff. */ + ld r12,0(r7) /* Load doubleword from memory. */ +#ifdef __LITTLE_ENDIAN__ + sld r5,r5,r6 +#else + srd r5,r5,r6 /* MASK = MASK >> padding. */ +#endif + orc r9,r12,r5 /* Mask bits that are not part of the string. */ + cmpb r10,r9,r0 /* Check for null bytes in DWORD1. */ + cmpdi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + ldu r6,8(r7) + cmpb r10,r6,r0 + cmpdi cr7,r10,0 + bne cr7,L(done) + + ld r12,0(r7) + cmpb r10,r12,r0 + cmpdi cr7,r10,0 + bne cr7,L(done) + + ldu r6,8(r7) + cmpb r10,r6,r0 + cmpdi cr7,r10,0 + bne cr7,L(done) + + /* We checked for 24 - x bytes, with x being the source alignment + (0 <= x <= 16), and no zero has been found. Start the loop + copy with doubleword aligned address. */ + mr r7,r4 + ld r12, 0(r7) + ldu r8, 8(r7) + +L(loop_before): + /* Save the two doublewords readed from source and align the source + to 16 bytes for the loop. */ + mr r11,r3 + std r12,0(r11) + std r8,8(r11) + addi r11,r11,16 + rldicl r9,r4,0,60 + subf r7,r9,r7 + subf r11,r9,r11 + b L(loop_start) + + .align 5 +L(loop): + std r12, 0(r11) + std r6, 8(r11) + addi r11,r11,16 +L(loop_start): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + + ld r12, 8(r7) + ldu r6, 16(r7) + cmpb r10,r12,r0 + cmpb r9,r6,r0 + or r8,r9,r10 /* Merge everything in one doubleword. */ + cmpdi cr7,r8,0 + beq cr7,L(loop) + + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + addi r4,r7,-8 + cmpdi cr6,r10,0 + addi r7,r7,-8 + bne cr6,L(done2) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + length. */ + + mr r10,r9 + addi r7,r7,8 + b L(done2) + + /* r10 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the length. */ +L(done): + mr r11,r3 +L(done2): +#ifdef __LITTLE_ENDIAN__ + addi r9, r10, -1 /* Form a mask from trailing zeros. */ + andc r9, r9, r10 + popcntd r6, r9 /* Count the bits in the mask. */ +#else + cntlzd r6,r10 /* Count leading zeros before the match. */ +#endif + subf r5,r4,r7 + srdi r6,r6,3 /* Convert leading/trailing zeros to bytes. */ + add r8,r5,r6 /* Compute final length. */ +#ifdef USE_AS_STPCPY + /* stpcpy returns the dest address plus the size not counting the + final '\0'. */ + add r3,r11,r8 +#endif + addi r8,r8,1 /* Final '/0'. */ + + cmpldi cr6,r8,8 + mtocrf 0x01,r8 + ble cr6,L(copy_LE_8) + + cmpldi cr1,r8,16 + blt cr1,8f + + /* Handle copies of 0~31 bytes. */ + .align 4 +L(copy_LT_32): + /* At least 6 bytes to go. */ + blt cr1,8f + + /* Copy 16 bytes. */ + ld r6,0(r4) + ld r8,8(r4) + addi r4,r4,16 + std r6,0(r11) + std r8,8(r11) + addi r11,r11,16 +8: /* Copy 8 bytes. */ + bf 28,L(tail4) + ld r6,0(r4) + addi r4,r4,8 + std r6,0(r11) + addi r11,r11,8 + + .align 4 +/* Copies 4~7 bytes. */ +L(tail4): + bf 29,L(tail2) + lwz r6,0(r4) + stw r6,0(r11) + bf 30,L(tail5) + lhz r7,4(r4) + sth r7,4(r11) + bflr 31 + lbz r8,6(r4) + stb r8,6(r11) + blr + + .align 4 +/* Copies 2~3 bytes. */ +L(tail2): + bf 30,1f + lhz r6,0(r4) + sth r6,0(r11) + bflr 31 + lbz r7,2(r4) + stb r7,2(r11) + blr + + .align 4 +L(tail5): + bf 31,1f + lbz r6,4(r4) + stb r6,4(r11) + blr + + .align 4 +1: + bflr 31 + lbz r6,0(r4) + stb r6,0(r11) + blr + +/* Handles copies of 0~8 bytes. */ + .align 4 +L(copy_LE_8): + bne cr6,L(tail4) + ld r6,0(r4) + std r6,0(r11) + blr +END (FUNC_NAME) + +#ifndef USE_AS_STPCPY +libc_hidden_builtin_def (strcpy) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcspn.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcspn.S new file mode 100644 index 0000000000..c9a7a2e3c3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strcspn.S @@ -0,0 +1,20 @@ +/* Optimized strcspn implementation for PowerPC64/POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STRCSPN 1 +#include <sysdeps/powerpc/powerpc64/power8/strspn.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strlen.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strlen.S new file mode 100644 index 0000000000..8f4a1fc1dc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strlen.S @@ -0,0 +1,301 @@ +/* Optimized strlen implementation for PowerPC64/POWER8 using a vectorized + loop. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* TODO: change these to the actual instructions when the minimum required + binutils allows it. */ +#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) +#define VBPERMQ(t,a,b) .long (0x1000054c \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +/* int [r3] strlen (char *s [r3]) */ + +#ifndef STRLEN +# define STRLEN strlen +#endif + +/* TODO: change this to .machine power8 when the minimum required binutils + allows it. */ + .machine power7 +EALIGN (STRLEN, 4, 0) + CALL_MCOUNT 1 + dcbt 0,r3 + clrrdi r4,r3,3 /* Align the address to doubleword boundary. */ + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + li r5,-1 /* MASK = 0xffffffffffffffff. */ + ld r12,0(r4) /* Load doubleword from memory. */ +#ifdef __LITTLE_ENDIAN__ + sld r5,r5,r6 +#else + srd r5,r5,r6 /* MASK = MASK >> padding. */ +#endif + orc r9,r12,r5 /* Mask bits that are not part of the string. */ + cmpb r10,r9,r0 /* Check for null bytes in DWORD1. */ + cmpdi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + /* For shorter strings (< 64 bytes), we will not use vector registers, + as the overhead isn't worth it. So, let's use GPRs instead. This + will be done the same way as we do in the POWER7 implementation. + Let's see if we are aligned to a quadword boundary. If so, we can + jump to the first (non-vectorized) loop. Otherwise, we have to + handle the next DWORD first. */ + mtcrf 0x01,r4 + mr r9,r4 + addi r9,r9,8 + bt 28,L(align64) + + /* Handle the next 8 bytes so we are aligned to a quadword + boundary. */ + ldu r5,8(r4) + cmpb r10,r5,r0 + cmpdi cr7,r10,0 + addi r9,r9,8 + bne cr7,L(done) + +L(align64): + /* Proceed to the old (POWER7) implementation, checking two doublewords + per iteraction. For the first 56 bytes, we will just check for null + characters. After that, we will also check if we are 64-byte aligned + so we can jump to the vectorized implementation. We will unroll + these loops to avoid excessive branching. */ + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + bne cr7,L(dword_zero) + + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + bne cr7,L(dword_zero) + + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + bne cr7,L(dword_zero) + + /* Are we 64-byte aligned? If so, jump to the vectorized loop. + Note: aligning to 64-byte will necessarily slow down performance for + strings around 64 bytes in length due to the extra comparisons + required to check alignment for the vectorized loop. This is a + necessary tradeoff we are willing to take in order to speed up the + calculation for larger strings. */ + andi. r10,r9,63 + beq cr0,L(preloop) + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + bne cr7,L(dword_zero) + + andi. r10,r9,63 + beq cr0,L(preloop) + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + bne cr7,L(dword_zero) + + andi. r10,r9,63 + beq cr0,L(preloop) + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + bne cr7,L(dword_zero) + + andi. r10,r9,63 + beq cr0,L(preloop) + ld r6,8(r4) + ldu r5,16(r4) + cmpb r10,r6,r0 + cmpb r11,r5,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + addi r9,r9,16 + + /* At this point, we are necessarily 64-byte aligned. If no zeroes were + found, jump to the vectorized loop. */ + beq cr7,L(preloop) + +L(dword_zero): + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r10,0 + addi r4,r4,-8 + bne cr6,L(done) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + length. */ + + mr r10,r11 + addi r4,r4,8 + + /* If the null byte was found in the non-vectorized code, compute the + final length. r10 has the output of the cmpb instruction, that is, + it contains 0xff in the same position as the null byte in the + original doubleword from the string. Use that to calculate the + length. */ +L(done): +#ifdef __LITTLE_ENDIAN__ + addi r9, r10,-1 /* Form a mask from trailing zeros. */ + andc r9, r9,r10 + popcntd r0, r9 /* Count the bits in the mask. */ +#else + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + subf r5,r3,r4 + srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r5,r0 /* Compute final length. */ + blr + + /* Vectorized implementation starts here. */ + .p2align 4 +L(preloop): + /* Set up for the loop. */ + mr r4,r9 + li r7, 16 /* Load required offsets. */ + li r8, 32 + li r9, 48 + li r12, 8 + vxor v0,v0,v0 /* VR with null chars to use with + vcmpequb. */ + + /* Main loop to look for the end of the string. We will read in + 64-byte chunks. Align it to 32 bytes and unroll it 3 times to + leverage the icache performance. */ + .p2align 5 +L(loop): + lvx v1,r4,r0 /* Load 4 quadwords. */ + lvx v2,r4,r7 + lvx v3,r4,r8 + lvx v4,r4,r9 + vminub v5,v1,v2 /* Compare and merge into one VR for speed. */ + vminub v6,v3,v4 + vminub v7,v5,v6 + vcmpequb. v7,v7,v0 /* Check for NULLs. */ + addi r4,r4,64 /* Adjust address for the next iteration. */ + bne cr6,L(vmx_zero) + + lvx v1,r4,r0 /* Load 4 quadwords. */ + lvx v2,r4,r7 + lvx v3,r4,r8 + lvx v4,r4,r9 + vminub v5,v1,v2 /* Compare and merge into one VR for speed. */ + vminub v6,v3,v4 + vminub v7,v5,v6 + vcmpequb. v7,v7,v0 /* Check for NULLs. */ + addi r4,r4,64 /* Adjust address for the next iteration. */ + bne cr6,L(vmx_zero) + + lvx v1,r4,r0 /* Load 4 quadwords. */ + lvx v2,r4,r7 + lvx v3,r4,r8 + lvx v4,r4,r9 + vminub v5,v1,v2 /* Compare and merge into one VR for speed. */ + vminub v6,v3,v4 + vminub v7,v5,v6 + vcmpequb. v7,v7,v0 /* Check for NULLs. */ + addi r4,r4,64 /* Adjust address for the next iteration. */ + beq cr6,L(loop) + +L(vmx_zero): + /* OK, we found a null byte. Let's look for it in the current 64-byte + block and mark it in its corresponding VR. */ + vcmpequb v1,v1,v0 + vcmpequb v2,v2,v0 + vcmpequb v3,v3,v0 + vcmpequb v4,v4,v0 + + /* We will now 'compress' the result into a single doubleword, so it + can be moved to a GPR for the final calculation. First, we + generate an appropriate mask for vbpermq, so we can permute bits into + the first halfword. */ + vspltisb v10,3 + lvsl v11,r0,r0 + vslb v10,v11,v10 + + /* Permute the first bit of each byte into bits 48-63. */ + VBPERMQ(v1,v1,v10) + VBPERMQ(v2,v2,v10) + VBPERMQ(v3,v3,v10) + VBPERMQ(v4,v4,v10) + + /* Shift each component into its correct position for merging. */ +#ifdef __LITTLE_ENDIAN__ + vsldoi v2,v2,v2,2 + vsldoi v3,v3,v3,4 + vsldoi v4,v4,v4,6 +#else + vsldoi v1,v1,v1,6 + vsldoi v2,v2,v2,4 + vsldoi v3,v3,v3,2 +#endif + + /* Merge the results and move to a GPR. */ + vor v1,v2,v1 + vor v2,v3,v4 + vor v4,v1,v2 + MFVRD(r10,v4) + + /* Adjust address to the begninning of the current 64-byte block. */ + addi r4,r4,-64 + +#ifdef __LITTLE_ENDIAN__ + addi r9, r10,-1 /* Form a mask from trailing zeros. */ + andc r9, r9,r10 + popcntd r0, r9 /* Count the bits in the mask. */ +#else + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + subf r5,r3,r4 + add r3,r5,r0 /* Compute final length. */ + blr + +END (STRLEN) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncase.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncase.S new file mode 100644 index 0000000000..32e09e4d94 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncase.S @@ -0,0 +1,20 @@ +/* Optimized strncasecmp implementation for POWER8. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STRNCASECMP 1 +#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncmp.S new file mode 100644 index 0000000000..3d8df90538 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncmp.S @@ -0,0 +1,327 @@ +/* Optimized strncmp implementation for PowerPC64/POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifndef STRNCMP +# define STRNCMP strncmp +#endif + +/* Implements the function + + int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t [r5] n) + + The implementation uses unaligned doubleword access to avoid specialized + code paths depending of data alignment. Although recent powerpc64 uses + 64K as default, the page cross handling assumes minimum page size of + 4k. */ + + .machine power7 +EALIGN (STRNCMP, 4, 0) + /* Check if size is 0. */ + mr. r10,r5 + beq cr0,L(ret0) + + /* Check if [s1]+16 or [s2]+16 will cross a 4K page boundary using + the code: + + (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE)) + + with PAGE_SIZE being 4096 and ITER_SIZE begin 16. */ + rldicl r8,r3,0,52 + cmpldi cr7,r8,4096-16 + bgt cr7,L(pagecross) + rldicl r9,r4,0,52 + cmpldi cr7,r9,4096-16 + bgt cr7,L(pagecross) + + /* For short string up to 16 bytes, load both s1 and s2 using + unaligned dwords and compare. */ + ld r7,0(r3) + ld r9,0(r4) + li r8,0 + cmpb r8,r7,r8 + cmpb r6,r7,r9 + orc. r8,r8,r6 + bne cr0,L(different1) + + /* If the string compared are equal, but size is less or equal + to 8, return 0. */ + cmpldi cr7,r10,8 + li r9,0 + ble cr7,L(ret1) + addi r5,r10,-8 + + ld r7,8(r3) + ld r9,8(r4) + cmpb r8,r7,r8 + cmpb r6,r7,r9 + orc. r8,r8,r6 + bne cr0,L(different0) + + cmpldi cr7,r5,8 + mr r9,r8 + ble cr7,L(ret1) + + /* Update pointers and size. */ + addi r10,r10,-16 + addi r3,r3,16 + addi r4,r4,16 + + /* Now it has checked for first 16 bytes, align source1 to doubleword + and adjust source2 address. */ +L(align_8b): + rldicl r5,r3,0,61 + rldicr r3,r3,0,60 + subf r4,r5,r4 + add r10,r10,r5 + + /* At this point, source1 alignment is 0 and source2 alignment is + between 0 and 7. Check is source2 alignment is 0, meaning both + sources have the same alignment. */ + andi. r8,r4,0x7 + beq cr0,L(loop_eq_align_0) + + li r5,0 + b L(loop_ne_align_1) + + /* If source2 is unaligned to doubleword, the code needs to check + on each interation if the unaligned doubleword access will cross + a 4k page boundary. */ + .align 4 +L(loop_ne_align_0): + ld r7,0(r3) + ld r9,0(r4) + cmpb r8,r7,r5 + cmpb r6,r7,r9 + orc. r8,r8,r6 + bne cr0,L(different1) + + cmpldi cr7,r10,8 + ble cr7,L(ret0) + addi r10,r10,-8 + addi r3,r3,8 + addi r4,r4,8 +L(loop_ne_align_1): + rldicl r9,r4,0,52 + cmpldi r7,r9,4088 + ble cr7,L(loop_ne_align_0) + cmpdi cr7,r10,0 + beq cr7,L(ret0) + + lbz r9,0(r3) + lbz r8,0(r4) + cmplw cr7,r9,r8 + bne cr7,L(byte_ne_4) + cmpdi cr7,r9,0 + beq cr7,L(size_reached_0) + + li r9,r7 + addi r8,r3,1 + mtctr r9 + addi r4,r4,1 + addi r10,r10,-1 + addi r3,r3,8 + + /* The unaligned read of source2 will cross a 4K page boundary, + and the different byte or NULL maybe be in the remaining page + bytes. Since it can not use the unaligned load the algorithm + reads and compares 8 bytes to keep source1 doubleword aligned. */ + .align 4 +L(loop_ne_align_byte): + cmpdi cr7,r10,0 + addi r10,r10,-1 + beq cr7,L(ret0) + lbz r9,0(r8) + lbz r7,0(r4) + addi r8,r8,1 + addi r4,r4,1 + cmplw cr7,r9,r7 + cmpdi cr5,r9,0 + bne cr7,L(size_reached_2) + beq cr5,L(size_reached_0) + bdnz L(loop_ne_align_byte) + + cmpdi cr7,r10,0 + bne+ cr7,L(loop_ne_align_0) + + .align 4 +L(ret0): + li r9,0 +L(ret1): + mr r3,r9 + blr + + /* The code now check if r8 and r10 are different by issuing a + cmpb and shift the result based on its output: + + #ifdef __LITTLE_ENDIAN__ + leadzero = (__builtin_ffsl (z1) - 1); + leadzero = leadzero > (n-1)*8 ? (n-1)*8 : leadzero; + r1 = (r1 >> leadzero) & 0xFFUL; + r2 = (r2 >> leadzero) & 0xFFUL; + #else + leadzero = __builtin_clzl (z1); + leadzero = leadzero > (n-1)*8 ? (n-1)*8 : leadzero; + r1 = (r1 >> (56 - leadzero)) & 0xFFUL; + r2 = (r2 >> (56 - leadzero)) & 0xFFUL; + #endif + return r1 - r2; */ + + .align 4 +L(different0): + mr r10,r5 +#ifdef __LITTLE_ENDIAN__ +L(different1): + neg r11,r8 + sldi r10,r10,3 + and r8,r11,r8 + addi r10,r10,-8 + cntlzd r8,r8 + subfic r8,r8,63 + extsw r8,r8 + cmpld cr7,r8,r10 + ble cr7,L(different2) + mr r8,r10 +L(different2): + extsw r8,r8 +#else +L(different1): + addi r10,r10,-1 + cntlzd r8,r8 + sldi r10,r10,3 + cmpld cr7,r8,r10 + blt cr7,L(different2) + mr r8,r10 +L(different2): + subfic r8,r8,56 +#endif + srd r7,r7,r8 + srd r9,r9,r8 + rldicl r3,r7,0,56 + rldicl r9,r9,0,56 + subf r9,r9,3 + extsw r9,r9 + mr r3,r9 + blr + + /* If unaligned 16 bytes reads across a 4K page boundary, it uses + a simple byte a byte comparison until the page alignment for s1 + is reached. */ + .align 4 +L(pagecross): + lbz r7,0(r3) + lbz r9,0(r4) + subfic r8,r8,4095 + cmplw cr7,r9,r7 + bne cr7,L(byte_ne_3) + cmpdi cr7,r9,0 + beq cr7,L(byte_ne_0) + addi r10,r10,-1 + subf r7,r8,r10 + subf r9,r7,r10 + addi r9,r9,1 + mtctr r9 + b L(pagecross_loop1) + + .align 4 +L(pagecross_loop0): + beq cr7,L(ret0) + lbz r9,0(r3) + lbz r8,0(r4) + addi r10,r10,-1 + cmplw cr7,r9,r8 + cmpdi cr5,r9,0 + bne r7,L(byte_ne_2) + beq r5,L(byte_ne_0) +L(pagecross_loop1): + cmpdi cr7,r10,0 + addi r3,r3,1 + addi r4,r4,1 + bdnz L(pagecross_loop0) + cmpdi cr7,r7,0 + li r9,0 + bne+ cr7,L(align_8b) + b L(ret1) + + /* If both source1 and source2 are doubleword aligned, there is no + need for page boundary cross checks. */ + .align 4 +L(loop_eq_align_0): + ld r7,0(r3) + ld r9,0(r4) + cmpb r8,r7,r8 + cmpb r6,r7,r9 + orc. r8,r8,r6 + bne cr0,L(different1) + + cmpldi cr7,r10,8 + ble cr7,L(ret0) + addi r9,r10,-9 + + li r5,0 + srdi r9,r9,3 + addi r9,r9,1 + mtctr r9 + b L(loop_eq_align_2) + + .align 4 +L(loop_eq_align_1): + bdz L(ret0) +L(loop_eq_align_2): + ldu r7,8(r3) + addi r10,r10,-8 + ldu r9,8(r4) + cmpb r8,r7,r5 + cmpb r6,r7,r9 + orc. r8,r8,r6 + beq cr0,L(loop_eq_align_1) + b L(different1) + + .align 4 +L(byte_ne_0): + li r7,0 +L(byte_ne_1): + subf r9,r9,r7 + extsw r9,r9 + b L(ret1) + + .align 4 +L(byte_ne_2): + extsw r7,r9 + mr r9,r8 + b L(byte_ne_1) +L(size_reached_0): + li r10,0 +L(size_reached_1): + subf r9,r9,r10 + extsw r9,r9 + b L(ret1) +L(size_reached_2): + extsw r10,r9 + mr r9,r7 + b L(size_reached_1) +L(byte_ne_3): + extsw r7,r7 + b L(byte_ne_1) +L(byte_ne_4): + extsw r10,r9 + mr r9,r8 + b L(size_reached_1) +END(STRNCMP) +libc_hidden_builtin_def(strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncpy.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncpy.S new file mode 100644 index 0000000000..6d40f30ff7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strncpy.S @@ -0,0 +1,465 @@ +/* Optimized strncpy/stpncpy implementation for PowerPC64/POWER8. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#ifdef USE_AS_STPNCPY +# ifndef STPNCPY +# define FUNC_NAME __stpncpy +# else +# define FUNC_NAME STPNCPY +# endif +#else +# ifndef STRNCPY +# define FUNC_NAME strncpy +# else +# define FUNC_NAME STRNCPY +# endif +#endif /* !USE_AS_STPNCPY */ + +#ifndef MEMSET +/* For builds without IFUNC support, local calls should be made to internal + GLIBC symbol (created by libc_hidden_builtin_def). */ +# ifdef SHARED +# define MEMSET __GI_memset +# else +# define MEMSET memset +# endif +#endif + +#define FRAMESIZE (FRAME_MIN_SIZE+48) + +/* Implements the function + + char * [r3] strncpy (char *dest [r3], const char *src [r4], size_t n [r5]) + + or + + char * [r3] stpncpy (char *dest [r3], const char *src [r4], size_t n [r5]) + + if USE_AS_STPCPY is defined. + + The implementation uses unaligned doubleword access to avoid specialized + code paths depending of data alignment. Although recent powerpc64 uses + 64K as default, the page cross handling assumes minimum page size of + 4k. */ + + .machine power7 +EALIGN (FUNC_NAME, 4, 0) + + /* Check if the [src]+15 will cross a 4K page by checking if the bit + indicating the page size changes. Basically: + + uint64_t srcin = (uint64_t)src; + uint64_t ob = srcin & 4096UL; + uint64_t nb = (srcin+15UL) & 4096UL; + if (ob ^ nb) + goto pagecross; */ + + addi r10,r4,16 + rlwinm r9,r4,0,19,19 + + /* Save some non-volatile registers on the stack. */ + std r26,-48(r1) + std r27,-40(r1) + + rlwinm r8,r10,0,19,19 + + std r28,-32(r1) + std r29,-24(r1) + + cmpld cr7,r9,r8 + + std r30,-16(r1) + std r31,-8(r1) + + /* Update CFI. */ + cfi_offset(r26, -48) + cfi_offset(r27, -40) + cfi_offset(r28, -32) + cfi_offset(r29, -24) + cfi_offset(r30, -16) + cfi_offset(r31, -8) + + beq cr7,L(unaligned_lt_16) + rldicl r9,r4,0,61 + subfic r8,r9,8 + cmpld cr7,r5,r8 + bgt cr7,L(pagecross) + + /* At this points there is 1 to 15 bytes to check and write. Since it could + be either from first unaligned 16 bytes access or from bulk copy, the code + uses an unrolled byte read/write instead of trying to analyze the cmpb + results. */ +L(short_path): + mr r9,r3 +L(short_path_1): + /* Return if there are no more bytes to be written. */ + cmpdi cr7,r5,0 + beq cr7,L(short_path_loop_end_1) +L(short_path_2): + /* Copy one char from src (r4) and write it to dest (r9). If it is the + end-of-string, start the null padding. Continue, otherwise. */ + lbz r10,0(r4) + cmpdi cr7,r10,0 + stb r10,0(r9) + beq cr7,L(zero_pad_start_1) + /* If there are no more bytes to be written, return. */ + cmpdi cr0,r5,1 + addi r8,r9,1 + addi r6,r5,-1 + beq cr0,L(short_path_loop_end_0) + /* Copy another char from src (r4) to dest (r9). Check again if it is + the end-of-string. If so, start the null padding. */ + lbz r10,1(r4) + cmpdi cr7,r10,0 + stb r10,1(r9) + beq cr7,L(zero_pad_start_prepare_1) + /* Eagerly decrement r5 by 3, which is the number of bytes already + written, plus one write that will be performed later on. */ + addi r10,r5,-3 + b L(short_path_loop_1) + + .align 4 +L(short_path_loop): + /* At this point, the induction variable, r5, as well as the pointers + to dest and src (r9 and r4, respectivelly) have been updated. + + Note: The registers r7 and r10 are induction variables derived from + r5. They are used to determine if the total number of writes has + been reached at every other write. + + Copy one char from src (r4) and write it to dest (r9). If it is the + end-of-string, start the null padding. Continue, otherwise. */ + lbz r8,0(r4) + addi r7,r10,-2 + cmpdi cr5,r8,0 + stb r8,0(r9) + beq cr5,L(zero_pad_start_1) + beq cr7,L(short_path_loop_end_0) + /* Copy another char from src (r4) to dest (r9). Check again if it is + the end-of-string. If so, start the null padding. */ + lbz r8,1(r4) + cmpdi cr7,r8,0 + stb r8,1(r9) + beq cr7,L(zero_pad_start) + mr r10,r7 +L(short_path_loop_1): + /* This block is reached after two chars have been already written to + dest. Nevertheless, r5 (the induction variable), r9 (the pointer to + dest), and r4 (the pointer to src) have not yet been updated. + + At this point: + r5 holds the count of bytes yet to be written plus 2. + r9 points to the last two chars that were already written to dest. + r4 points to the last two chars that were already copied from src. + + The algorithm continues by decrementing r5, the induction variable, + so that it reflects the last two writes. The pointers to dest (r9) + and to src (r4) are increment by two, for the same reason. + + Note: Register r10 is another induction variable, derived from r5, + which determines if the total number of writes has been reached. */ + addic. r5,r5,-2 + addi r9,r9,2 + cmpdi cr7,r10,0 /* Eagerly check if the next write is the last. */ + addi r4,r4,2 + addi r6,r9,1 + bne cr0,L(short_path_loop) /* Check if the total number of writes + has been reached at every other + write. */ +#ifdef USE_AS_STPNCPY + mr r3,r9 + b L(short_path_loop_end) +#endif + +L(short_path_loop_end_0): +#ifdef USE_AS_STPNCPY + addi r3,r9,1 + b L(short_path_loop_end) +#endif +L(short_path_loop_end_1): +#ifdef USE_AS_STPNCPY + mr r3,r9 +#endif +L(short_path_loop_end): + /* Restore non-volatile registers. */ + ld r26,-48(r1) + ld r27,-40(r1) + ld r28,-32(r1) + ld r29,-24(r1) + ld r30,-16(r1) + ld r31,-8(r1) + blr + + /* This code pads the remainder of dest with NULL bytes. The algorithm + calculates the remaining size and calls memset. */ + .align 4 +L(zero_pad_start): + mr r5,r10 + mr r9,r6 +L(zero_pad_start_1): + /* At this point: + - r5 holds the number of bytes that still have to be written to + dest. + - r9 points to the position, in dest, where the first null byte + will be written. + The above statements are true both when control reaches this label + from a branch or when falling through the previous lines. */ +#ifndef USE_AS_STPNCPY + mr r30,r3 /* Save the return value of strncpy. */ +#endif + /* Prepare the call to memset. */ + mr r3,r9 /* Pointer to the area to be zero-filled. */ + li r4,0 /* Byte to be written (zero). */ + + /* We delayed the creation of the stack frame, as well as the saving of + the link register, because only at this point, we are sure that + doing so is actually needed. */ + + /* Save the link register. */ + mflr r0 + std r0,16(r1) + cfi_offset(lr, 16) + + /* Create the stack frame. */ + stdu r1,-FRAMESIZE(r1) + cfi_adjust_cfa_offset(FRAMESIZE) + + bl MEMSET + nop + + /* Restore the stack frame. */ + addi r1,r1,FRAMESIZE + cfi_adjust_cfa_offset(-FRAMESIZE) + /* Restore the link register. */ + ld r0,16(r1) + mtlr r0 + +#ifndef USE_AS_STPNCPY + mr r3,r30 /* Restore the return value of strncpy, i.e.: + dest. For stpncpy, the return value is the + same as return value of memset. */ +#endif + + /* Restore non-volatile registers and return. */ + ld r26,-48(r1) + ld r27,-40(r1) + ld r28,-32(r1) + ld r29,-24(r1) + ld r30,-16(r1) + ld r31,-8(r1) + blr + + /* The common case where [src]+16 will not cross a 4K page boundary. + In this case the code fast check the first 16 bytes by using doubleword + read/compares and update destiny if neither total size or null byte + is found in destiny. */ + .align 4 +L(unaligned_lt_16): + cmpldi cr7,r5,7 + ble cr7,L(short_path) + ld r7,0(r4) + li r8,0 + cmpb r8,r7,r8 + cmpdi cr7,r8,0 + bne cr7,L(short_path_prepare_2) + addi r6,r5,-8 + std r7,0(r3) + addi r9,r3,8 + cmpldi cr7,r6,7 + addi r7,r4,8 + ble cr7,L(short_path_prepare_1_1) + ld r4,8(r4) + cmpb r8,r4,r8 + cmpdi cr7,r8,0 + bne cr7,L(short_path_prepare_2_1) + std r4,8(r3) + addi r29,r3,16 + addi r5,r5,-16 + /* Neither the null byte was found or total length was reached, + align to 16 bytes and issue a bulk copy/compare. */ + b L(align_to_16b) + + /* In the case of 4k page boundary cross, the algorithm first align + the address to a doubleword, calculate a mask based on alignment + to ignore the bytes and continue using doubleword. */ + .align 4 +L(pagecross): + rldicr r11,r4,0,59 /* Align the address to 8 bytes boundary. */ + li r6,-1 /* MASK = 0xffffffffffffffffUL. */ + sldi r9,r9,3 /* Calculate padding. */ + ld r7,0(r11) /* Load doubleword from memory. */ +#ifdef __LITTLE_ENDIAN__ + sld r9,r6,r9 /* MASK = MASK << padding. */ +#else + srd r9,r6,r9 /* MASK = MASK >> padding. */ +#endif + orc r9,r7,r9 /* Mask bits that are not part of the + string. */ + li r7,0 + cmpb r9,r9,r7 /* Check for null bytes in DWORD1. */ + cmpdi cr7,r9,0 + bne cr7,L(short_path_prepare_2) + subf r8,r8,r5 /* Adjust total length. */ + cmpldi cr7,r8,8 /* Check if length was reached. */ + ble cr7,L(short_path_prepare_2) + + /* For next checks we have aligned address, so we check for more + three doublewords to make sure we can read 16 unaligned bytes + to start the bulk copy with 16 aligned addresses. */ + ld r7,8(r11) + cmpb r9,r7,r9 + cmpdi cr7,r9,0 + bne cr7,L(short_path_prepare_2) + addi r7,r8,-8 + cmpldi cr7,r7,8 + ble cr7,L(short_path_prepare_2) + ld r7,16(r11) + cmpb r9,r7,r9 + cmpdi cr7,r9,0 + bne cr7,L(short_path_prepare_2) + addi r8,r8,-16 + cmpldi cr7,r8,8 + ble cr7,L(short_path_prepare_2) + ld r8,24(r11) + cmpb r9,r8,r9 + cmpdi cr7,r9,0 + bne cr7,L(short_path_prepare_2) + + /* No null byte found in the 32 bytes readed and length not reached, + read source again using unaligned loads and store them. */ + ld r9,0(r4) + addi r29,r3,16 + addi r5,r5,-16 + std r9,0(r3) + ld r9,8(r4) + std r9,8(r3) + + /* Align source to 16 bytes and adjust destiny and size. */ +L(align_to_16b): + rldicl r9,r10,0,60 + rldicr r28,r10,0,59 + add r12,r5,r9 + subf r29,r9,r29 + + /* The bulk read/compare/copy loads two doublewords, compare and merge + in a single register for speed. This is an attempt to speed up the + null-checking process for bigger strings. */ + + cmpldi cr7,r12,15 + ble cr7,L(short_path_prepare_1_2) + + /* Main loop for large sizes, unrolled 2 times to get better use of + pipeline. */ + ld r8,0(28) + ld r10,8(28) + li r9,0 + cmpb r7,r8,r9 + cmpb r9,r10,r9 + or. r6,r9,r7 + bne cr0,L(short_path_prepare_2_3) + addi r5,r12,-16 + addi r4,r28,16 + std r8,0(r29) + std r10,8(r29) + cmpldi cr7,r5,15 + addi r9,r29,16 + ble cr7,L(short_path_1) + mr r11,r28 + mr r6,r29 + li r30,0 + subfic r26,r4,48 + subfic r27,r9,48 + + b L(loop_16b) + + .align 4 +L(loop_start): + ld r31,0(r11) + ld r10,8(r11) + cmpb r0,r31,r7 + cmpb r8,r10,r7 + or. r7,r0,r8 + addi r5,r5,-32 + cmpldi cr7,r5,15 + add r4,r4,r26 + add r9,r9,r27 + bne cr0,L(short_path_prepare_2_2) + add r4,r28,r4 + std r31,0(r6) + add r9,r29,r9 + std r10,8(r6) + ble cr7,L(short_path_1) + +L(loop_16b): + ld r10,16(r11) + ld r0,24(r11) + cmpb r8,r10,r30 + cmpb r7,r0,r30 + or. r7,r8,r7 + addi r12,r12,-32 + cmpldi cr7,r12,15 + addi r11,r11,32 + bne cr0,L(short_path_2) + std r10,16(r6) + addi r6,r6,32 + std r0,-8(r6) + bgt cr7,L(loop_start) + + mr r5,r12 + mr r4,r11 + mr r9,r6 + b L(short_path_1) + + .align 4 +L(short_path_prepare_1_1): + mr r5,r6 + mr r4,r7 + b L(short_path_1) +L(short_path_prepare_1_2): + mr r5,r12 + mr r4,r28 + mr r9,r29 + b L(short_path_1) +L(short_path_prepare_2): + mr r9,r3 + b L(short_path_2) +L(short_path_prepare_2_1): + mr r5,r6 + mr r4,r7 + b L(short_path_2) +L(short_path_prepare_2_2): + mr r5,r12 + mr r4,r11 + mr r9,r6 + b L(short_path_2) +L(short_path_prepare_2_3): + mr r5,r12 + mr r4,r28 + mr r9,r29 + b L(short_path_2) +L(zero_pad_start_prepare_1): + mr r5,r6 + mr r9,r8 + b L(zero_pad_start_1) +END (FUNC_NAME) + +#ifndef USE_AS_STPNCPY +libc_hidden_builtin_def (strncpy) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strnlen.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strnlen.S new file mode 100644 index 0000000000..3eadbfb09e --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strnlen.S @@ -0,0 +1,433 @@ +/* Optimized strnlen implementation for POWER8 using a vmx loop. + + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* It is implemented the following heuristic: + 1. Case maxlen <= 32: align the pointer to 8 bytes to loop through + reading doublewords. Uses the POWER7 algorithm. + 2. Case maxlen > 32: check for null bytes in the first 16 bytes using + unaligned accesses. Return length if found. Otherwise: + 2.1 Case maxlen < 64: deduct the bytes previously read, align + the pointer to 16 bytes and loop through reading quadwords + until find null bytes or reach maxlen. + 2.2 Case maxlen > 64: deduct the bytes previously read, align + the pointer to 64 bytes and set up a counter to loop through + reading in strides of 64 bytes. In case it finished the loop + with null bytes not found, process the remainder bytes by + switching to the loop to heuristic in 2.1. */ + +#include <sysdep.h> + +/* Define default page size to 4KB. */ +#define PAGE_SIZE 4096 + +/* The following macros implement Power ISA v2.07 opcodes + that could not be used directly into this code to the keep + compatibility with older binutils versions. */ + +/* Move from vector register doubleword. */ +#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) + +/* Move to vector register doubleword. */ +#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) + +/* Vector Bit Permute Quadword. */ +#define VBPERMQ(t,a,b) .long (0x1000054c \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +/* Vector Population Count Halfword. */ +#define VPOPCNTH(t,b) .long (0x10000743 | ((t)<<(32-11)) | ((b)<<(32-21))) + +/* Vector Count Leading Zeros Halfword. */ +#define VCLZH(t,b) .long (0x10000742 | ((t)<<(32-11)) | ((b)<<(32-21))) + + +/* int [r3] strnlen (char *s [r3], size_t maxlen [r4]) */ +/* TODO: change to power8 when minimum required binutils allows it. */ + .machine power7 +ENTRY (__strnlen) + CALL_MCOUNT 2 + dcbt 0,r3 + + cmpldi r4,32 /* Check if maxlen <= 32. */ + ble L(small_range) /* If maxlen <= 32. */ + + /* Upcoming 16 bytes unaligned accesses cannot cross the page boundary + otherwise the processor throws an memory access error. + Use following code to check there is room for such as accesses: + (((size_t) s) % PAGE_SIZE > (PAGE_SIZE - 16) + If it is disallowed then switch to the code that handles + the string when maxlen <= 32. */ + clrldi r10,r3,52 + cmpldi cr7,r10,PAGE_SIZE-16 + bgt cr7,L(small_range) /* If less than 16B of page end. */ + + /* Compute our permute constant r8. */ + li r7,0 + /* Compute a bpermd constant to move bit 0 of each word into + a halfword value, and count trailing zeros. */ +#ifdef __LITTLE_ENDIAN__ + li r8,0x2820 + oris r8,r8,0x3830 + sldi r8,r8,32 + ori r8,r8,0x0800 + oris r8,r8,0x1810 +#else + li r8,0x1018 + oris r8,r8,0x0008 + sldi r8,r8,32 + ori r8,r8,0x3038 + oris r8,r8,0x2028 +#endif + + /* maxlen > 32. Optimistically check for null bytes in the first + 16 bytes of the string using unaligned accesses. */ + ld r5,0(r3) + ld r6,8(r3) + cmpb r10,r7,r5 /* Check for null bytes in DWORD1. */ + cmpb r11,r7,r6 /* Check for null bytes in DWORD2. */ + or. r7,r10,r11 + bne cr0, L(early_find) /* If found null bytes. */ + + /* At this point maxlen > 32 and null bytes were not found at first + 16 bytes. Prepare for loop using VMX. */ + + /* r3 == s, r4 == maxlen. All other volatile regs are unused now. */ + + addi r5,r3,16 /* Align up, or just add the 16B we + already checked. */ + li r0,15 + and r7,r5,r0 /* Find offset into 16B alignment. */ + andc r5,r5,r0 /* Quadword align up s to the next quadword. */ + li r0,16 + subf r0,r7,r0 + subf r4,r0,r4 /* Deduct unaligned bytes from maxlen. */ + + + /* Compute offsets for vmx loads, and precompute the vbpermq + constants for both the 64B and 16B loops. */ + li r6,0 + vspltisb v0,0 + vspltisb v10,3 + lvsl v11,r6,r6 + vslb v10,v11,v10 + + cmpldi r4,64 /* Check maxlen < 64. */ + blt L(smaller) /* If maxlen < 64 */ + + /* In order to begin the 64B loop, it needs to be 64 + bytes aligned. So read quadwords until it is aligned or found null + bytes. At worst case it will be aligned after the fourth iteration, + so unroll the loop to avoid counter checking. */ + andi. r7,r5,63 /* Check if is 64 bytes aligned. */ + beq cr0,L(preloop_64B) /* If it is already 64B aligned. */ + lvx v1,r5,r6 + vcmpequb. v1,v1,v0 + addi r5,r5,16 + addi r4,r4,-16 /* Decrement maxlen in 16 bytes. */ + bne cr6,L(found_aligning64B) /* If found null bytes. */ + + /* Unroll 3x above code block until aligned or find null bytes. */ + andi. r7,r5,63 + beq cr0,L(preloop_64B) + lvx v1,r5,r6 + vcmpequb. v1,v1,v0 + addi r5,r5,16 + addi r4,r4,-16 + bne cr6,L(found_aligning64B) + + andi. r7,r5,63 + beq cr0,L(preloop_64B) + lvx v1,r5,r6 + vcmpequb. v1,v1,v0 + addi r5,r5,16 + addi r4,r4,-16 + bne cr6,L(found_aligning64B) + + andi. r7,r5,63 + beq cr0,L(preloop_64B) + lvx v1,r5,r6 + vcmpequb. v1,v1,v0 + addi r5,r5,16 + addi r4,r4,-16 + bne cr6,L(found_aligning64B) + + /* At this point it should be 16 bytes aligned. + Prepare for the 64B loop. */ + .p2align 4 +L(preloop_64B): + /* Check if maxlen became is less than 64, therefore disallowing the + 64B loop. If it happened switch to the 16B loop code. */ + cmpldi r4,64 /* Check if maxlen < 64. */ + blt L(smaller) /* If maxlen < 64. */ + /* Set some constant values. */ + li r7,16 + li r10,32 + li r9,48 + + /* Compute the number of 64 bytes iterations needed. */ + srdi r11,r4,6 /* Compute loop count (maxlen / 64). */ + andi. r4,r4,63 /* Set maxlen the remainder (maxlen % 64). */ + mtctr r11 /* Move loop count to counter register. */ + + /* Handle maxlen > 64. Loop over the bytes in strides of 64B. */ + .p2align 4 +L(loop_64B): + lvx v1,r5,r6 /* r5 is the pointer to s. */ + lvx v2,r5,r7 + lvx v3,r5,r10 + lvx v4,r5,r9 + /* Compare the four 16B vectors to obtain the least 16 values. + Null bytes should emerge into v7, then check for null bytes. */ + vminub v5,v1,v2 + vminub v6,v3,v4 + vminub v7,v5,v6 + vcmpequb. v7,v7,v0 /* Check for null bytes. */ + addi r5,r5,64 /* Add pointer to next iteraction. */ + bne cr6,L(found_64B) /* If found null bytes. */ + bdnz L(loop_64B) /* Continue the loop if count > 0. */ + +/* Hit loop end without null match. So branch to handle the remainder. */ + + /* Prepare a 16B loop to handle two cases: + 1. If 32 > maxlen < 64. + 2. If maxlen >= 64, and reached end of the 64B loop with null + bytes not found. Thus handle the remainder bytes here. */ + .p2align 4 +L(smaller): + cmpldi r4,0 /* Check maxlen is zero. */ + beq L(done) /* If maxlen is zero. */ + + /* Place rounded up number of qw's to check into a vmx + register, and use some vector tricks to minimize + branching. */ + MTVRD(v7,r4) /* Copy maxlen from GPR to vector register. */ + vspltisb v5,1 + vspltisb v6,15 + vspltb v2,v7,7 + vaddubs v3,v5,v6 + +#ifdef __LITTLE_ENDIAN__ + vspltish v5,1 /* Compute 16 in each byte. */ +#endif + + /* Loop in 16B aligned incremements now. */ + .p2align 4 +L(loop_16B): + lvx v1,r5,r6 /* Load quadword into vector register. */ + addi r5,r5,16 /* Increment address to next 16B block. */ + vor v7,v2,v2 /* Save loop count (v2) into v7. */ + vsububs v2,v2,v3 /* Subtract 16B from count, saturate at 0. */ + vminub v4,v1,v2 + vcmpequb. v4,v4,v0 /* Checking for null bytes. */ + beq cr6,L(loop_16B) /* If null bytes not found. */ + + vcmpequb v1,v1,v0 + VBPERMQ(v1,v1,v10) +#ifdef __LITTLE_ENDIAN__ + vsubuhm v2,v1,v5 /* Form a mask of trailing zeros. */ + vandc v2,v2,v1 + VPOPCNTH(v1,v2) /* Count of trailing zeros, 16 if none. */ +#else + VCLZH(v1,v1) /* Count the leading zeros, 16 if none. */ +#endif + /* Truncate to maximum allowable offset. */ + vcmpgtub v2,v1,v7 /* Compare and truncate for matches beyond + maxlen. */ + vsel v1,v1,v7,v2 /* 0-16 is now in byte 7. */ + + MFVRD(r0,v1) + addi r5,r5,-16 /* Undo speculative bump. */ + extsb r0,r0 /* Clear whatever gunk is in the high 56b. */ + add r5,r5,r0 /* Add the offset of whatever was found. */ +L(done): + subf r3,r3,r5 /* Length is equal to the offset of null byte + matched minus the pointer to s. */ + blr /* Done. */ + + /* Handle case of maxlen > 64 and found null bytes in last block + of 64 bytes read. */ + .p2align 4 +L(found_64B): + /* A zero was found. Reduce the result. */ + vcmpequb v1,v1,v0 + vcmpequb v2,v2,v0 + vcmpequb v3,v3,v0 + vcmpequb v4,v4,v0 + + /* Permute the first bit of each byte into bits 48-63. */ + VBPERMQ(v1,v1,v10) + VBPERMQ(v2,v2,v10) + VBPERMQ(v3,v3,v10) + VBPERMQ(v4,v4,v10) + + /* Shift each component into its correct position for merging. */ +#ifdef __LITTLE_ENDIAN__ + vsldoi v2,v2,v2,2 + vsldoi v3,v3,v3,4 + vsldoi v4,v4,v4,6 +#else + vsldoi v1,v1,v1,6 + vsldoi v2,v2,v2,4 + vsldoi v3,v3,v3,2 +#endif + + /* Merge the results and move to a GPR. */ + vor v1,v2,v1 + vor v2,v3,v4 + vor v4,v1,v2 + + /* Adjust address to the start of the current 64B block. */ + addi r5,r5,-64 + + MFVRD(r10,v4) +#ifdef __LITTLE_ENDIAN__ + addi r9,r10,-1 /* Form a mask from trailing zeros. */ + andc r9,r9,r10 + popcntd r0,r9 /* Count the bits in the mask. */ +#else + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + subf r5,r3,r5 + add r3,r5,r0 /* Compute final length. */ + blr /* Done. */ + + /* Handle case where null bytes were found while aligning + as a preparation for the 64B loop. */ + .p2align 4 +L(found_aligning64B): + VBPERMQ(v1,v1,v10) +#ifdef __LITTLE_ENDIAN__ + MFVRD(r10,v1) + addi r9,r10,-1 /* Form a mask from trailing zeros. */ + andc r9,r9,r10 + popcntd r0,r9 /* Count the bits in the mask. */ +#else + vsldoi v1,v1,v1,6 + MFVRD(r10,v1) + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + addi r5,r5,-16 /* Adjust address to offset of last 16 bytes + read. */ + /* Calculate length as subtracted the pointer to s of last 16 bytes + offset, added with the bytes before the match. */ + subf r5,r3,r5 + add r3,r5,r0 + blr /* Done. */ + + /* Handle case of maxlen > 32 and found a null bytes within the first + 16 bytes of s. */ + .p2align 4 +L(early_find): + bpermd r5,r8,r10 /* r8 contains the bit permute constants. */ + bpermd r6,r8,r11 + sldi r5,r5,8 + or r5,r5,r6 /* r5 should hold a 16B mask of + a potential 0. */ + cntlzd r5,r5 /* Count leading zeros. */ + addi r3,r5,-48 /* Deduct the 48 leading zeros always + present. */ + blr /* Done. */ + + /* Handle case of maxlen <= 32. Use the POWER7 algorithm. */ + .p2align 4 +L(small_range): + clrrdi r8,r3,3 /* Align the pointer to 8B. */ + li r0,0 + /* Register's content at this point: + r3 == pointer to s, r4 == maxlen, r8 == pointer to s aligned to 8B, + r7 == last acceptable address. */ + cmpldi r4,0 /* Check if maxlen is zero. */ + beq L(end_max) /* If maxlen is zero. */ + + /* Calculate the last acceptable address and check for possible + addition overflow by using satured math: + r7 = r3 + r4 + r7 |= -(r7 < x) */ + add r7,r3,r4 + subfc r6,r3,r7 + subfe r9,r9,r9 + extsw r6,r9 + or r7,r7,r6 + addi r7,r7,-1 + + clrrdi r7,r7,3 /* Align to 8B address of last + acceptable address. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + ld r12,0(r8) /* Load aligned doubleword. */ + cmpb r10,r12,r0 /* Check for null bytes. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + sld r10,r10,r6 +#else + sld r10,r10,r6 + srd r10,r10,r6 +#endif /* __LITTLE_ENDIAN__ */ + cmpldi cr7,r10,0 + bne cr7,L(done_small) /* If found null byte. */ + + cmpld r8,r7 /* Check if reached maxlen. */ + beq L(end_max) /* If reached maxlen. */ + + /* Still handling case of maxlen <= 32. Read doubleword aligned until + find null bytes or reach maxlen. */ + .p2align 4 +L(loop_small): + ldu r12,8(r8) /* Load next doubleword and update r8. */ + cmpb r10,r12,r0 /* Check for null bytes. */ + cmpldi cr6,r10,0 + bne cr6,L(done_small) /* If found null bytes. */ + cmpld r8,r7 /* Check if reached maxlen. */ + bne L(loop_small) /* If it has more bytes to read. */ + mr r3,r4 /* Reached maxlen with null bytes not found. + Length is equal to maxlen. */ + blr /* Done. */ + + /* Still handling case of maxlen <= 32. Found null bytes. + Registers: r10 == match bits within doubleword, r8 == address of + last doubleword read, r3 == pointer to s, r4 == maxlen. */ + .p2align 4 +L(done_small): +#ifdef __LITTLE_ENDIAN__ + /* Count trailing zeros. */ + addi r0,r10,-1 + andc r0,r0,r10 + popcntd r0,r0 +#else + cntlzd r0,r10 /* Count leading zeros before the match. */ +#endif + sub r3,r8,r3 /* Calculate total of bytes before the match. */ + srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */ + add r3,r3,r0 /* Length until the match. */ + cmpld r3,r4 /* Check length is greater than maxlen. */ + blelr + mr r3,r4 /* If length is greater than maxlen, return + maxlen. */ + blr + + /* Handle case of reached maxlen with null bytes not found. */ + .p2align 4 +L(end_max): + mr r3,r4 /* Length is equal to maxlen. */ + blr /* Done. */ + + +END (__strnlen) +libc_hidden_def (__strnlen) +weak_alias (__strnlen, strnlen) +libc_hidden_def (strnlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strrchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strrchr.S new file mode 100644 index 0000000000..8eb74853c3 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strrchr.S @@ -0,0 +1,464 @@ +/* Optimized strrchr implementation for PowerPC64/POWER7 using cmpb insn. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* char *[r3] strrchr (char *s [r3], int c [r4]) */ +/* TODO: change these to the actual instructions when the minimum required + binutils allows it. */ +#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) +#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) +#define VBPERMQ(t,a,b) .long (0x1000054c \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) +#define VCLZD(r,v) .long (0x100007c2 | ((r)<<(32-11)) | ((v)<<(32-21))) +#define VPOPCNTD(r,v) .long (0x100007c3 | ((r)<<(32-11)) | ((v)<<(32-21))) +#define VADDUQM(t,a,b) .long (0x10000100 \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) +#ifdef __LITTLE_ENDIAN__ +/* Find the match position from v6 and place result in r6. */ +# define CALCULATE_MATCH() \ + VBPERMQ(v6, v6, v10); \ + vsldoi v6, v6, v6, 6; \ + MFVRD(r7, v6); \ + cntlzd r6, r7; \ + subfic r6, r6, 15; +/* + * Find the first null position to mask bytes after null. + * (reg): vcmpequb result: v2 for 1st qw v3 for 2nd qw. + * Result placed at v2. + */ +# define FIND_NULL_POS(reg) \ + vspltisb v11, -1; \ + VADDUQM(v11, reg, v11); \ + vandc v11, v11, reg; \ + VPOPCNTD(v2, v11); \ + vspltb v11, v2, 15; \ + vcmpequb. v11, v11, v9; \ + blt cr6, 1f; \ + vsldoi v9, v0, v9, 1; \ + vslo v2, v2, v9; \ +1: \ + vsumsws v2, v2, v0; +#else +# define CALCULATE_MATCH() \ + VBPERMQ(v6, v6, v10); \ + MFVRD(r7, v6); \ + addi r6, r7, -1; \ + andc r6, r6, r7; \ + popcntd r6, r6; \ + subfic r6, r6, 15; +# define FIND_NULL_POS(reg) \ + VCLZD(v2, reg); \ + vspltb v11, v2, 7; \ + vcmpequb. v11, v11, v9; \ + blt cr6, 1f; \ + vsldoi v9, v0, v9, 1; \ + vsro v2, v2, v9; \ +1: \ + vsumsws v2, v2, v0; +#endif /* !__LITTLE_ENDIAN__ */ + .machine power7 +ENTRY (strrchr) + CALL_MCOUNT 2 + dcbt 0,r3 + clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ + cmpdi cr7,r4,0 + ld r12,0(r8) /* Load doubleword from memory. */ + li r9,0 /* Used to store last occurence. */ + li r0,0 /* Doubleword with null chars to use + with cmpb. */ + + rlwinm r6,r3,3,26,28 /* Calculate padding. */ + + beq cr7,L(null_match) + + /* Replicate byte to doubleword. */ + insrdi r4,r4,8,48 + insrdi r4,r4,16,32 + insrdi r4,r4,32,0 + + /* r4 is changed now. If it's passed more chars, then + check for null again. */ + cmpdi cr7,r4,0 + beq cr7,L(null_match) + /* Now r4 has a doubleword of c bytes and r0 has + a doubleword of null bytes. */ + + cmpb r10,r12,r4 /* Compare each byte against c byte. */ + cmpb r11,r12,r0 /* Compare each byte against null byte. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r10,r10,r6 + srd r11,r11,r6 + sld r10,r10,r6 + sld r11,r11,r6 +#else + sld r10,r10,r6 + sld r11,r11,r6 + srd r10,r10,r6 + srd r11,r11,r6 +#endif + or r5,r10,r11 /* OR the results to speed things up. */ + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done) + +L(align): + andi. r12, r8, 15 + + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bne cr0, L(loop) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + or r5,r10,r11 + cmpdi cr7,r5,0 + bne cr7,L(done) + b L(loop) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + .p2align 5 +L(loop): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r7,16(r8) + cmpb r10,r12,r4 + cmpb r11,r12,r0 + cmpb r6,r7,r4 + cmpb r7,r7,r0 + or r12,r10,r11 + or r5,r6,r7 + or r5,r12,r5 + cmpdi cr7,r5,0 + beq cr7,L(vector) + + /* OK, one (or both) of the doublewords contains a c/null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a c/null byte. */ + cmpdi cr6,r12,0 + addi r8,r8,-8 + bne cr6,L(done) + + /* The c/null byte must be in the second doubleword. Adjust the + address again and move the result of cmpb to r10 so we can calculate + the pointer. */ + + mr r10,r6 + mr r11,r7 + addi r8,r8,8 + + /* r10/r11 have the output of the cmpb instructions, that is, + 0xff in the same position as the c/null byte in the original + doubleword from the string. Use that to calculate the pointer. */ + +L(done): + /* If there are more than one 0xff in r11, find the first position of + 0xff in r11 and fill r10 with 0 from that position. */ + cmpdi cr7,r11,0 + beq cr7,L(no_null) +#ifdef __LITTLE_ENDIAN__ + addi r3,r11,-1 + andc r3,r3,r11 + popcntd r0,r3 +#else + cntlzd r0,r11 +#endif + subfic r0,r0,63 + li r6,-1 +#ifdef __LITTLE_ENDIAN__ + srd r0,r6,r0 +#else + sld r0,r6,r0 +#endif + and r10,r0,r10 +L(no_null): +#ifdef __LITTLE_ENDIAN__ + cntlzd r0,r10 /* Count leading zeros before c matches. */ + addi r3,r10,-1 + andc r3,r3,r10 + addi r10,r11,-1 + andc r10,r10,r11 + cmpld cr7,r3,r10 + bgt cr7,L(no_match) +#else + addi r3,r10,-1 /* Count trailing zeros before c matches. */ + andc r3,r3,r10 + popcntd r0,r3 + cmpld cr7,r11,r10 + bgt cr7,L(no_match) +#endif + srdi r0,r0,3 /* Convert trailing zeros to bytes. */ + subfic r0,r0,7 + add r9,r8,r0 /* Return address of the matching c byte + or null in case c was not found. */ + li r0,0 + cmpdi cr7,r11,0 /* If r11 == 0, no null's have been found. */ + beq cr7,L(align) + + .align 4 +L(no_match): + mr r3,r9 + blr + +/* Check the first 32B in GPR's and move to vectorized loop. */ + .p2align 5 +L(vector): + addi r3, r8, 8 + /* Make sure 32B aligned. */ + andi. r10, r3, 31 + bne cr0, L(loop) + vspltisb v0, 0 + /* Precompute vbpermq constant. */ + vspltisb v10, 3 + lvsl v11, r0, r0 + vslb v10, v11, v10 + MTVRD(v1, r4) + li r5, 16 + vspltb v1, v1, 7 + /* Compare 32 bytes in each loop. */ +L(continue): + lvx v4, 0, r3 + lvx v5, r3, r5 + vcmpequb v2, v0, v4 + vcmpequb v3, v0, v5 + vcmpequb v6, v1, v4 + vcmpequb v7, v1, v5 + vor v8, v2, v3 + vor v9, v6, v7 + vor v11, v8, v9 + vcmpequb. v11, v0, v11 + addi r3, r3, 32 + blt cr6, L(continue) + vcmpequb. v8, v0, v8 + blt cr6, L(match) + + /* One (or both) of the quadwords contains c/null. */ + vspltisb v8, 2 + vspltisb v9, 5 + /* Precompute values used for comparison. */ + vsl v9, v8, v9 /* v9 = 0x4040404040404040. */ + vaddubm v8, v9, v9 + vsldoi v8, v0, v8, 1 /* v8 = 0x80. */ + + /* Check if null is in second qw. */ + vcmpequb. v11, v0, v2 + blt cr6, L(secondqw) + + /* Null found in first qw. */ + addi r8, r3, -32 + /* Calculate the null position. */ + FIND_NULL_POS(v2) + /* Check if null is in the first byte. */ + vcmpequb. v11, v0, v2 + blt cr6, L(no_match) + vsububm v2, v8, v2 + /* Mask unwanted bytes after null. */ +#ifdef __LITTLE_ENDIAN__ + vslo v6, v6, v2 + vsro v6, v6, v2 +#else + vsro v6, v6, v2 + vslo v6, v6, v2 +#endif + vcmpequb. v11, v0, v6 + blt cr6, L(no_match) + /* Found a match before null. */ + CALCULATE_MATCH() + add r3, r8, r6 + blr + +L(secondqw): + addi r8, r3, -16 + FIND_NULL_POS(v3) + vcmpequb. v11, v0, v2 + blt cr6, L(no_match1) + vsububm v2, v8, v2 + /* Mask unwanted bytes after null. */ +#ifdef __LITTLE_ENDIAN__ + vslo v7, v7, v2 + vsro v7, v7, v2 +#else + vsro v7, v7, v2 + vslo v7, v7, v2 +#endif + vcmpequb. v11, v0, v7 + blt cr6, L(no_match1) + addi r8, r8, 16 + vor v6, v0, v7 +L(no_match1): + addi r8, r8, -16 + vcmpequb. v11, v0, v6 + blt cr6, L(no_match) + /* Found a match before null. */ + CALCULATE_MATCH() + add r3, r8, r6 + blr + +L(match): + /* One (or both) of the quadwords contains a match. */ + mr r8, r3 + vcmpequb. v8, v0, v7 + blt cr6, L(firstqw) + /* Match found in second qw. */ + addi r8, r8, 16 + vor v6, v0, v7 +L(firstqw): + addi r8, r8, -32 + CALCULATE_MATCH() + add r9, r8, r6 /* Compute final length. */ + b L(continue) +/* We are here because strrchr was called with a null byte. */ + .align 4 +L(null_match): + /* r0 has a doubleword of null bytes. */ + + cmpb r5,r12,r0 /* Compare each byte against null bytes. */ + + /* Move the doublewords left and right to discard the bits that are + not part of the string and bring them back as zeros. */ +#ifdef __LITTLE_ENDIAN__ + srd r5,r5,r6 + sld r5,r5,r6 +#else + sld r5,r5,r6 + srd r5,r5,r6 +#endif + cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes + have been found. */ + bne cr7,L(done_null) + + andi. r12, r8, 15 + + /* Are we now aligned to a quadword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bne cr0, L(loop_null) + + /* Handle WORD2 of pair. */ + ldu r12,8(r8) + cmpb r5,r12,r0 + cmpdi cr7,r5,0 + bne cr7,L(done_null) + b L(loop_null) /* We branch here (rather than falling through) + to skip the nops due to heavy alignment + of the loop below. */ + + /* Main loop to look for the end of the string. Since it's a + small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop_null): + /* Load two doublewords, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + ld r12,8(r8) + ldu r11,16(r8) + cmpb r5,r12,r0 + cmpb r10,r11,r0 + or r6,r5,r10 + cmpdi cr7,r6,0 + beq cr7,L(vector1) + + /* OK, one (or both) of the doublewords contains a null byte. Check + the first doubleword and decrement the address in case the first + doubleword really contains a null byte. */ + + cmpdi cr6,r5,0 + addi r8,r8,-8 + bne cr6,L(done_null) + + /* The null byte must be in the second doubleword. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + pointer. */ + + mr r5,r10 + addi r8,r8,8 + + /* r5 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + doubleword from the string. Use that to calculate the pointer. */ +L(done_null): +#ifdef __LITTLE_ENDIAN__ + addi r0,r5,-1 + andc r0,r0,r5 + popcntd r0,r0 +#else + cntlzd r0,r5 /* Count leading zeros before the match. */ +#endif + srdi r0,r0,3 /* Convert trailing zeros to bytes. */ + add r3,r8,r0 /* Return address of the matching null byte. */ + blr +/* Check the first 32B in GPR's and move to vectorized loop. */ + .p2align 5 +L(vector1): + addi r3, r8, 8 + /* Make sure 32B aligned. */ + andi. r10, r3, 31 + bne cr0, L(loop_null) + vspltisb v0, 0 + /* Precompute vbpermq constant. */ + vspltisb v10, 3 + lvsl v11, r0, r0 + vslb v10, v11, v10 + li r5, 16 + /* Compare 32 bytes in each loop. */ +L(continue1): + lvx v4, 0, r3 + lvx v5, r3, r5 + vcmpequb v2, v0, v4 + vcmpequb v3, v0, v5 + vor v8, v2, v3 + vcmpequb. v11, v0, v8 + addi r3, r3, 32 + blt cr6, L(continue1) + addi r3, r3, -32 + VBPERMQ(v2, v2, v10) + VBPERMQ(v3, v3, v10) + /* Shift each component into its correct position for merging. */ +#ifdef __LITTLE_ENDIAN__ + vsldoi v3, v3, v3, 2 +#else + vsldoi v2, v2, v2, 6 + vsldoi v3, v3, v3, 4 +#endif + /* Merge the results and move to a GPR. */ + vor v4, v3, v2 + MFVRD(r5, v4) +#ifdef __LITTLE_ENDIAN__ + addi r6, r5, -1 + andc r6, r6, r5 + popcntd r6, r6 +#else + cntlzd r6, r5 /* Count leading zeros before the match. */ +#endif + add r3, r3, r6 /* Compute final length. */ + blr +END (strrchr) +weak_alias (strrchr, rindex) +libc_hidden_builtin_def (strrchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strspn.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strspn.S new file mode 100644 index 0000000000..e9271898f2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power8/strspn.S @@ -0,0 +1,202 @@ +/* Optimized strspn implementation for Power8. + + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* size_t [r3] strspn (const char *string [r3], + const char *needleAccept [r4]) */ + +/* This takes a novel approach by computing a 256 bit mask whereby + each set bit implies the byte is "accepted". P8 vector hardware + has extremely efficient hardware for selecting bits from a mask. + + One might ask "why not use bpermd for short strings"? It is + so slow that its performance about matches the generic PPC64 + variant without any fancy masking, with the added expense of + making the mask. That was the first variant of this. */ + + + +#include "sysdep.h" + +#ifndef USE_AS_STRCSPN +# define USE_AS_STRCSPN 0 +# ifndef STRSPN +# define STRSPN strspn +# endif +# define INITIAL_MASK 0 +# define UPDATE_MASK(RA, RS, RB) or RA, RS, RB +#else +# ifndef STRSPN +# define STRSPN strcspn +# endif +# define INITIAL_MASK -1 +# define UPDATE_MASK(RA, RS, RB) andc RA, RS, RB +#endif + +/* Simple macro to use VSX instructions in overlapping VR's. */ +#define XXVR(insn, vrt, vra, vrb) \ + insn 32+vrt, 32+vra, 32+vrb + +/* ISA 2.07B instructions are not all defined for older binutils. + Macros are defined below for these newer instructions in order + to maintain compatibility. */ + +/* Note, TX/SX is always set as VMX regs are the high 32 VSX regs. */ +#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) +#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) + +#define VBPERMQ(t,a,b) .long (0x1000054c \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + + /* This can be updated to power8 once the minimum version of + binutils supports power8 and the above instructions. */ + .machine power7 +EALIGN(STRSPN, 4, 0) + CALL_MCOUNT 2 + + /* Generate useful constants for later on. */ + vspltisb v1, 7 + vspltisb v2, -1 + vslb v1, v1, v1 /* 0x80 to swap high bit for vbpermq. */ + vspltisb v10, 0 + vsldoi v4, v10, v2, 2 /* 0xFFFF into vr4. */ + XXVR(xxmrgld, v4, v4, v10) /* Mask for checking matches. */ + + /* Prepare to compute 256b mask. */ + addi r4, r4, -1 + li r5, INITIAL_MASK + li r6, INITIAL_MASK + li r7, INITIAL_MASK + li r8, INITIAL_MASK + +#if USE_AS_STRCSPN + /* Ensure the null character never matches by clearing ISA bit 0 in + in r5 which is the bit which will check for it in the later usage + of vbpermq. */ + srdi r5, r5, 1 +#endif + + li r11, 1 + sldi r11, r11, 63 + + /* Start interleaved Mask computation. + This will eventually or 1's into ignored bits from vbpermq. */ + lvsr v11, 0, r3 + vspltb v11, v11, 0 /* Splat shift constant. */ + + /* Build a 256b mask in r5-r8. */ + .align 4 +L(next_needle): + lbzu r9, 1(r4) + + cmpldi cr0, r9, 0 + cmpldi cr1, r9, 128 + + /* This is a little tricky. srd only uses the first 7 bits, + and if bit 7 is set, value is always 0. So, we can + effectively shift 128b in this case. */ + xori r12, r9, 0x40 /* Invert bit 6. */ + srd r10, r11, r9 /* Mask for bits 0-63. */ + srd r12, r11, r12 /* Mask for bits 64-127. */ + + beq cr0, L(start_cmp) + + /* Now, or the value into the correct GPR. */ + bge cr1,L(needle_gt128) + UPDATE_MASK (r5, r5, r10) /* 0 - 63. */ + UPDATE_MASK (r6, r6, r12) /* 64 - 127. */ + b L(next_needle) + + .align 4 +L(needle_gt128): + UPDATE_MASK (r7, r7, r10) /* 128 - 191. */ + UPDATE_MASK (r8, r8, r12) /* 192 - 255. */ + b L(next_needle) + + + .align 4 +L(start_cmp): + /* Move and merge bitmap into 2 VRs. bpermd is slower on P8. */ + mr r0, r3 /* Save r3 for final length computation. */ + MTVRD (v5, r5) + MTVRD (v6, r6) + MTVRD (v7, r7) + MTVRD (v8, r8) + + /* Continue interleaved mask generation. */ +#ifdef __LITTLE_ENDIAN__ + vsrw v11, v2, v11 /* Note, shift ignores higher order bits. */ + vsplth v11, v11, 0 /* Only care about the high 16 bits of v10. */ +#else + vslw v11, v2, v11 /* Note, shift ignores higher order bits. */ + vsplth v11, v11, 1 /* Only care about the low 16 bits of v10. */ +#endif + lvx v0, 0, r3 /* Note, unaligned load ignores lower bits. */ + + /* Do the merging of the bitmask. */ + XXVR(xxmrghd, v5, v5, v6) + XXVR(xxmrghd, v6, v7, v8) + + /* Finish mask generation. */ + vand v11, v11, v4 /* Throwaway bits not in the mask. */ + + /* Compare the first 1-16B, while masking unwanted bytes. */ + clrrdi r3, r3, 4 /* Note, counts from qw boundaries. */ + vxor v9, v0, v1 /* Swap high bit. */ + VBPERMQ (v8, v5, v0) + VBPERMQ (v7, v6, v9) + vor v7, v7, v8 + vor v7, v7, v11 /* Ignore non-participating bytes. */ + vcmpequh. v8, v7, v4 + bnl cr6, L(done) + + addi r3, r3, 16 + + .align 4 +L(vec): + lvx v0, 0, r3 + addi r3, r3, 16 + vxor v9, v0, v1 /* Swap high bit. */ + VBPERMQ (v8, v5, v0) + VBPERMQ (v7, v6, v9) + vor v7, v7, v8 + vcmpequh. v8, v7, v4 + blt cr6, L(vec) + + addi r3, r3, -16 +L(done): + subf r3, r0, r3 + MFVRD (r10, v7) + +#ifdef __LITTLE_ENDIAN__ + addi r0, r10, 1 /* Count the trailing 1's. */ + andc r10, r10, r0 + popcntd r10, r10 +#else + xori r10, r10, 0xffff /* Count leading 1's by inverting. */ + addi r3, r3, -48 /* Account for the extra leading zeros. */ + cntlzd r10, r10 +#endif + + add r3, r3, r10 + blr + +END(STRSPN) +libc_hidden_builtin_def (STRSPN) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power9/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/Implies new file mode 100644 index 0000000000..fad2505ab9 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/Implies @@ -0,0 +1,2 @@ +powerpc/powerpc64/power8/fpu +powerpc/powerpc64/power8 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power9/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/fpu/Implies new file mode 100644 index 0000000000..ae0dbaf857 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power8/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power9/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/fpu/multiarch/Implies new file mode 100644 index 0000000000..f11e1bdba2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power8/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power9/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/multiarch/Implies new file mode 100644 index 0000000000..dd6bca4b36 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power8/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power9/strcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/strcmp.S new file mode 100644 index 0000000000..2dc4f6c722 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/strcmp.S @@ -0,0 +1,268 @@ +/* Optimized strcmp implementation for PowerPC64/POWER9. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#ifdef __LITTLE_ENDIAN__ +#include <sysdep.h> + +#ifndef STRCMP +# define STRCMP strcmp +#endif + +/* Implements the function + + int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) + + The implementation uses unaligned doubleword access for first 32 bytes + as in POWER8 patch and uses vectorised loops after that. */ + +/* TODO: Change this to actual instructions when minimum binutils is upgraded + to 2.27. Macros are defined below for these newer instructions in order + to maintain compatibility. */ +# define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21))) + +# define VEXTUBRX(t,a,b) .long (0x1000070d \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +# define VCMPNEZB(t,a,b) .long (0x10000507 \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +/* Get 16 bytes for unaligned case. + reg1: Vector to hold next 16 bytes. + reg2: Address to read from. + reg3: Permute control vector. */ +# define GET16BYTES(reg1, reg2, reg3) \ + lvx reg1, 0, reg2; \ + vperm v8, v2, reg1, reg3; \ + vcmpequb. v8, v0, v8; \ + beq cr6, 1f; \ + vspltisb v9, 0; \ + b 2f; \ + .align 4; \ +1: \ + addi r6, reg2, 16; \ + lvx v9, 0, r6; \ +2: \ + vperm reg1, v9, reg1, reg3; + +/* TODO: change this to .machine power9 when the minimum required binutils + allows it. */ + + .machine power7 +EALIGN (STRCMP, 4, 0) + li r0, 0 + + /* Check if [s1]+16 or [s2]+16 will cross a 4K page boundary using + the code: + + (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE)) + + with PAGE_SIZE being 4096 and ITER_SIZE begin 16. */ + + rldicl r7, r3, 0, 52 + rldicl r9, r4, 0, 52 + cmpldi cr7, r7, 4096-16 + bgt cr7, L(pagecross_check) + cmpldi cr5, r9, 4096-16 + bgt cr5, L(pagecross_check) + + /* For short strings up to 16 bytes, load both s1 and s2 using + unaligned dwords and compare. */ + ld r8, 0(r3) + ld r10, 0(r4) + cmpb r12, r8, r0 + cmpb r11, r8, r10 + orc. r9, r12, r11 + bne cr0, L(different_nocmpb) + + ld r8, 8(r3) + ld r10, 8(r4) + cmpb r12, r8, r0 + cmpb r11, r8, r10 + orc. r9, r12, r11 + bne cr0, L(different_nocmpb) + + addi r7, r3, 16 + addi r4, r4, 16 + +L(align): + /* Now it has checked for first 16 bytes. */ + vspltisb v0, 0 + vspltisb v2, -1 + lvsr v6, 0, r4 /* Compute mask. */ + or r5, r4, r7 + andi. r5, r5, 0xF + beq cr0, L(aligned) + andi. r5, r7, 0xF + beq cr0, L(s1_align) + lvsr v10, 0, r7 /* Compute mask. */ + + /* Both s1 and s2 are unaligned. */ + GET16BYTES(v4, r7, v10) + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + beq cr6, L(match) + b L(different) + + /* Align s1 to qw and adjust s2 address. */ + .align 4 +L(match): + clrldi r6, r7, 60 + subfic r5, r6, 16 + add r7, r7, r5 + add r4, r4, r5 + andi. r5, r4, 0xF + beq cr0, L(aligned) + lvsr v6, 0, r4 + /* There are 2 loops depending on the input alignment. + Each loop gets 16 bytes from s1 and s2 and compares. + Loop until a mismatch or null occurs. */ +L(s1_align): + lvx v4, r7, r0 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + bne cr6, L(different) + + lvx v4, r7, r0 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + bne cr6, L(different) + + lvx v4, r7, r0 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + bne cr6, L(different) + + lvx v4, r7, r0 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + beq cr6, L(s1_align) + b L(different) + + .align 4 +L(aligned): + lvx v4, 0, r7 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + bne cr6, L(different) + + lvx v4, 0, r7 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + bne cr6, L(different) + + lvx v4, 0, r7 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + bne cr6, L(different) + + lvx v4, 0, r7 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + addi r7, r7, 16 + addi r4, r4, 16 + beq cr6, L(aligned) + + /* Calculate and return the difference. */ +L(different): + VCTZLSBB(r6, v7) + VEXTUBRX(r5, r6, v4) + VEXTUBRX(r4, r6, v5) + subf r3, r4, r5 + extsw r3, r3 + blr + + .align 4 +L(different_nocmpb): + neg r3, r9 + and r9, r9, r3 + cntlzd r9, r9 + subfic r9, r9, 63 + srd r3, r8, r9 + srd r10, r10, r9 + rldicl r10, r10, 0, 56 + rldicl r3, r3, 0, 56 + subf r3, r10, r3 + extsw r3, r3 + blr + + .align 4 +L(pagecross_check): + subfic r9, r9, 4096 + subfic r7, r7, 4096 + cmpld cr7, r7, r9 + bge cr7, L(pagecross) + mr r7, r9 + + /* If unaligned 16 bytes reads across a 4K page boundary, it uses + a simple byte a byte comparison until the page alignment for s1 + is reached. */ +L(pagecross): + add r7, r3, r7 + subf r9, r3, r7 + mtctr r9 + + .align 4 +L(pagecross_loop): + /* Loads a byte from s1 and s2, compare if *s1 is equal to *s2 + and if *s1 is '\0'. */ + lbz r9, 0(r3) + lbz r10, 0(r4) + addi r3, r3, 1 + addi r4, r4, 1 + cmplw cr7, r9, r10 + cmpdi cr5, r9, r0 + bne cr7, L(pagecross_ne) + beq cr5, L(pagecross_nullfound) + bdnz L(pagecross_loop) + b L(align) + + .align 4 +L(pagecross_ne): + extsw r3, r9 + mr r9, r10 +L(pagecross_retdiff): + subf r9, r9, r3 + extsw r3, r9 + blr + + .align 4 +L(pagecross_nullfound): + li r3, 0 + b L(pagecross_retdiff) +END (STRCMP) +libc_hidden_builtin_def (strcmp) +#else +#include <sysdeps/powerpc/powerpc64/power8/strcmp.S> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/power9/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/strncmp.S new file mode 100644 index 0000000000..c946a5c638 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/power9/strncmp.S @@ -0,0 +1,379 @@ +/* Optimized strncmp implementation for PowerPC64/POWER9. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#ifdef __LITTLE_ENDIAN__ +#include <sysdep.h> + +/* Implements the function + + int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t [r5] n) + + The implementation uses unaligned doubleword access to avoid specialized + code paths depending of data alignment for first 32 bytes and uses + vectorised loops after that. */ + +#ifndef STRNCMP +# define STRNCMP strncmp +#endif + +/* TODO: Change this to actual instructions when minimum binutils is upgraded + to 2.27. Macros are defined below for these newer instructions in order + to maintain compatibility. */ +# define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21))) + +# define VEXTUBRX(t,a,b) .long (0x1000070d \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +# define VCMPNEZB(t,a,b) .long (0x10000507 \ + | ((t)<<(32-11)) \ + | ((a)<<(32-16)) \ + | ((b)<<(32-21)) ) + +/* Get 16 bytes for unaligned case. + reg1: Vector to hold next 16 bytes. + reg2: Address to read from. + reg3: Permute control vector. */ +# define GET16BYTES(reg1, reg2, reg3) \ + lvx reg1, 0, reg2; \ + vperm v8, v2, reg1, reg3; \ + vcmpequb. v8, v0, v8; \ + beq cr6, 1f; \ + vspltisb v9, 0; \ + b 2f; \ + .align 4; \ +1: \ + cmplw cr6, r5, r11; \ + ble cr6, 2f; \ + addi r6, reg2, 16; \ + lvx v9, 0, r6; \ +2: \ + vperm reg1, v9, reg1, reg3; + +/* TODO: change this to .machine power9 when minimum binutils + is upgraded to 2.27. */ + .machine power7 +EALIGN (STRNCMP, 4, 0) + /* Check if size is 0. */ + cmpdi cr0, r5, 0 + beq cr0, L(ret0) + li r0, 0 + + /* Check if [s1]+32 or [s2]+32 will cross a 4K page boundary using + the code: + + (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE)) + + with PAGE_SIZE being 4096 and ITER_SIZE begin 32. */ + rldicl r8, r3, 0, 52 + cmpldi cr7, r8, 4096-32 + bgt cr7, L(pagecross) + rldicl r9, r4, 0, 52 + cmpldi cr7, r9, 4096-32 + bgt cr7, L(pagecross) + + /* For short strings up to 32 bytes, load both s1 and s2 using + unaligned dwords and compare. */ + + ld r7, 0(r3) + ld r9, 0(r4) + li r8, 0 + cmpb r8, r7, r8 + cmpb r6, r7, r9 + orc. r8, r8, r6 + bne cr0, L(different1) + + /* If the strings compared are equal, but size is less or equal + to 8, return 0. */ + cmpldi cr7, r5, 8 + li r9, 0 + ble cr7, L(ret1) + addi r5, r5, -8 + + ld r7, 8(r3) + ld r9, 8(r4) + cmpb r8, r7, r8 + cmpb r6, r7, r9 + orc. r8, r8, r6 + bne cr0, L(different1) + cmpldi cr7, r5, 8 + mr r9, r8 + ble cr7, L(ret1) + /* Update pointers and size. */ + addi r5, r5, -8 + addi r3, r3, 16 + addi r4, r4, 16 + + ld r7, 0(r3) + ld r9, 0(r4) + li r8, 0 + cmpb r8, r7, r8 + cmpb r6, r7, r9 + orc. r8, r8, r6 + bne cr0, L(different1) + cmpldi cr7, r5, 8 + li r9, 0 + ble cr7, L(ret1) + addi r5, r5, -8 + + ld r7, 8(r3) + ld r9, 8(r4) + cmpb r8, r7, r8 + cmpb r6, r7, r9 + orc. r8, r8, r6 + bne cr0, L(different1) + cmpldi cr7, r5, 8 + mr r9, r8 + ble cr7, L(ret1) + + /* Update pointers and size. */ + addi r5, r5, -8 + addi r3, r3, 16 + addi r4, r4, 16 +L(align): + /* Now it has checked for first 32 bytes, align source1 to doubleword + and adjust source2 address. */ + vspltisb v0, 0 + vspltisb v2, -1 + or r6, r4, r3 + andi. r6, r6, 0xF + beq cr0, L(aligned) + lvsr v6, 0, r4 /* Compute mask. */ + clrldi r6, r4, 60 + subfic r11, r6, 16 + andi. r6, r3, 0xF + beq cr0, L(s1_align) + /* Both s1 and s2 are unaligned. */ + GET16BYTES(v5, r4, v6) + lvsr v10, 0, r3 /* Compute mask. */ + clrldi r6, r3, 60 + subfic r11, r6, 16 + GET16BYTES(v4, r3, v10) + VCMPNEZB(v7, v5, v4) + beq cr6, L(match) + b L(different) + + /* Align s1 to qw and adjust s2 address. */ + .align 4 +L(match): + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + subf r5, r11, r5 + add r3, r3, r11 + add r4, r4, r11 + andi. r11, r4, 0xF + beq cr0, L(aligned) + lvsr v6, 0, r4 + clrldi r6, r4, 60 + subfic r11, r6, 16 + /* There are 2 loops depending on the input alignment. + Each loop gets 16 bytes from s1 and s2, checks for null + and compares them. Loops until a mismatch or null occurs. */ +L(s1_align): + lvx v4, 0, r3 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + + lvx v4, 0, r3 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + + lvx v4, 0, r3 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + + lvx v4, 0, r3 + GET16BYTES(v5, r4, v6) + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + b L(s1_align) + .align 4 +L(aligned): + lvx v4, 0, r3 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + + lvx v4, 0, r3 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + + lvx v4, 0, r3 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + + lvx v4, 0, r3 + lvx v5, 0, r4 + VCMPNEZB(v7, v5, v4) + bne cr6, L(different) + cmpldi cr7, r5, 16 + ble cr7, L(ret0) + addi r5, r5, -16 + addi r3, r3, 16 + addi r4, r4, 16 + b L(aligned) + /* Calculate and return the difference. */ +L(different): + VCTZLSBB(r6, v7) + cmplw cr7, r5, r6 + ble cr7, L(ret0) + VEXTUBRX(r5, r6, v4) + VEXTUBRX(r4, r6, v5) + subf r3, r4, r5 + extsw r3, r3 + blr + + .align 4 +L(ret0): + li r9, 0 +L(ret1): + mr r3, r9 + blr + + /* The code now checks if r8 and r5 are different by issuing a + cmpb and shifts the result based on its output: + + leadzero = (__builtin_ffsl (z1) - 1); + leadzero = leadzero > (n-1)*8 ? (n-1)*8 : leadzero; + r1 = (r1 >> leadzero) & 0xFFUL; + r2 = (r2 >> leadzero) & 0xFFUL; + return r1 - r2; */ + + .align 4 +L(different1): + neg r11, r8 + sldi r5, r5, 3 + and r8, r11, r8 + addi r5, r5, -8 + cntlzd r8, r8 + subfic r8, r8, 63 + extsw r8, r8 + cmpld cr7, r8, r5 + ble cr7, L(different2) + mr r8, r5 +L(different2): + extsw r8, r8 + srd r7, r7, r8 + srd r9, r9, r8 + rldicl r3, r7, 0, 56 + rldicl r9, r9, 0, 56 + subf r9, r9, 3 + extsw r9, r9 + mr r3, r9 + blr + + /* If unaligned 16 bytes reads across a 4K page boundary, it uses + a simple byte a byte comparison until the page alignment for s1 + is reached. */ + .align 4 +L(pagecross): + lbz r7, 0(r3) + lbz r9, 0(r4) + subfic r8, r8,4095 + cmplw cr7, r9, r7 + bne cr7, L(byte_ne_3) + cmpdi cr7, r9, 0 + beq cr7, L(byte_ne_0) + addi r5, r5, -1 + subf r7, r8, r5 + subf r9, r7, r5 + addi r9, r9, 1 + mtctr r9 + b L(pagecross_loop1) + + .align 4 +L(pagecross_loop0): + beq cr7, L(ret0) + lbz r9, 0(r3) + lbz r8, 0(r4) + addi r5, r5, -1 + cmplw cr7, r9, r8 + cmpdi cr5, r9, 0 + bne cr7, L(byte_ne_2) + beq cr5, L(byte_ne_0) +L(pagecross_loop1): + cmpdi cr7, r5, 0 + addi r3, r3, 1 + addi r4, r4, 1 + bdnz L(pagecross_loop0) + cmpdi cr7, r7, 0 + li r9, 0 + bne+ cr7, L(align) + b L(ret1) + + .align 4 +L(byte_ne_0): + li r7, 0 +L(byte_ne_1): + subf r9, r9, r7 + extsw r9, r9 + b L(ret1) + + .align 4 +L(byte_ne_2): + extsw r7, r9 + mr r9, r8 + b L(byte_ne_1) +L(byte_ne_3): + extsw r7, r7 + b L(byte_ne_1) +END(STRNCMP) +libc_hidden_builtin_def(strncmp) +#else +#include <sysdeps/powerpc/powerpc64/power8/strncmp.S> +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/ppc-mcount.S b/REORG.TODO/sysdeps/powerpc/powerpc64/ppc-mcount.S new file mode 100644 index 0000000000..8312f46644 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/ppc-mcount.S @@ -0,0 +1,39 @@ +/* PowerPC64-specific implementation of profiling support. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +/* We don't need to save the parameter-passing registers as gcc takes + care of that for us. Thus this function looks fairly normal. + In fact, the generic code would work for us. */ + +ENTRY(_mcount) + mflr r4 + ld r11, 0(r1) + stdu r1,-FRAME_MIN_SIZE(r1) + cfi_adjust_cfa_offset (FRAME_MIN_SIZE) + std r4, FRAME_MIN_SIZE+FRAME_LR_SAVE(r1) + cfi_offset (lr, FRAME_LR_SAVE) + ld r3, FRAME_LR_SAVE(r11) + bl JUMPTARGET(__mcount_internal) + nop + ld r0, FRAME_MIN_SIZE+FRAME_LR_SAVE(r1) + mtlr r0 + addi r1,r1,FRAME_MIN_SIZE + blr +END(_mcount) + diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/register-dump.h b/REORG.TODO/sysdeps/powerpc/powerpc64/register-dump.h new file mode 100644 index 0000000000..215e42b63f --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/register-dump.h @@ -0,0 +1,124 @@ +/* Dump registers. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sys/uio.h> +#include <_itoa.h> + +/* This prints out the information in the following form: */ +static const char dumpform[] = "\ +Register dump:\n\ +sr0=000000000000020% sr1=000000000000021% dar=000000000000029% dsi=000002a%\n\ +lr=000000000000024% ctr=000000000000023% gr3*=000000000000022% trap=0000028%\n\ +ccr=0000026% xer=0000025%\n\ +gr0-3: 000000000000000% 000000000000001% 000000000000002% 000000000000003%\n\ +gr4-7: 000000000000004% 000000000000005% 000000000000006% 000000000000007%\n\ +gr8-11: 000000000000008% 000000000000009% 00000000000000a% 00000000000000b%\n\ +gr12-15: 00000000000000c% 00000000000000d% 00000000000000e% 00000000000000f%\n\ +gr16-19: 000000000000010% 000000000000011% 000000000000012% 000000000000013%\n\ +gr20-23: 000000000000014% 000000000000015% 000000000000016% 000000000000017%\n\ +gr24-27: 000000000000018% 000000000000019% 00000000000001a% 00000000000001b%\n\ +gr28-31: 00000000000001c% 00000000000001d% 00000000000001e% 00000000000001f%\n\ +fscr=000000000000050%\n\ +fp0-3: 000000000000030% 000000000000031% 000000000000032% 000000000000033%\n\ +fp4-7: 000000000000034% 000000000000035% 000000000000036% 000000000000037%\n\ +fp8-11: 000000000000038% 000000000000038% 00000000000003a% 00000000000003b%\n\ +fp12-15: 00000000000003c% 00000000000003d% 00000000000003e% 00000000000003f%\n\ +fp16-19: 000000000000040% 000000000000041% 000000000000042% 000000000000043%\n\ +fp20-23: 000000000000044% 000000000000045% 000000000000046% 000000000000047%\n\ +fp24-27: 000000000000048% 000000000000049% 00000000000004a% 00000000000004b%\n\ +fp28-31: 00000000000004c% 00000000000004d% 00000000000004e% 00000000000004f%\n\ +"; + +/* Most of the fields are self-explanatory. 'sr0' is the next + instruction to execute, from SRR0, which may have some relationship + with the instruction that caused the exception. 'r3*' is the value + that will be returned in register 3 when the current system call + returns. 'sr1' is SRR1, bits 16-31 of which are copied from the MSR: + + 16 - External interrupt enable + 17 - Privilege level (1=user, 0=supervisor) + 18 - FP available + 19 - Machine check enable (if clear, processor locks up on machine check) + 20 - FP exception mode bit 0 (FP exceptions recoverable) + 21 - Single-step trace enable + 22 - Branch trace enable + 23 - FP exception mode bit 1 + 25 - exception prefix (if set, exceptions are taken from 0xFFFnnnnn, + otherwise from 0x000nnnnn). + 26 - Instruction address translation enabled. + 27 - Data address translation enabled. + 30 - Exception is recoverable (otherwise, don't try to return). + 31 - Little-endian mode enable. + + 'Trap' is the address of the exception: + + 00200 - Machine check exception (memory parity error, for instance) + 00300 - Data access exception (memory not mapped, see dsisr for why) + 00400 - Instruction access exception (memory not mapped) + 00500 - External interrupt + 00600 - Alignment exception (see dsisr for more information) + 00700 - Program exception (illegal/trap instruction, FP exception) + 00800 - FP unavailable (should not be seen by user code) + 00900 - Decrementer exception (for instance, SIGALRM) + 00A00 - I/O controller interface exception + 00C00 - System call exception (for instance, kill(3)). + 00E00 - FP assist exception (optional FP instructions, etc.) + + 'dar' is the memory location, for traps 00300, 00400, 00600, 00A00. + 'dsisr' has the following bits under trap 00300: + 0 - direct-store error exception + 1 - no page table entry for page + 4 - memory access not permitted + 5 - trying to access I/O controller space or using lwarx/stwcx on + non-write-cached memory + 6 - access was store + 9 - data access breakpoint hit + 10 - segment table search failed to find translation (64-bit ppcs only) + 11 - I/O controller instruction not permitted + For trap 00400, the same bits are set in SRR1 instead. + For trap 00600, bits 12-31 of the DSISR set to allow emulation of + the instruction without actually having to read it from memory. +*/ + +#define xtoi(x) (x >= 'a' ? x + 10 - 'a' : x - '0') + +static void +register_dump (int fd, struct sigcontext *ctx) +{ + char buffer[sizeof(dumpform)]; + char *bufferpos; + unsigned regno; + unsigned long *regs = (unsigned long *)(ctx->regs); + + memcpy(buffer, dumpform, sizeof(dumpform)); + + /* Generate the output. */ + while ((bufferpos = memchr (buffer, '%', sizeof(dumpform)))) + { + regno = xtoi (bufferpos[-1]) | xtoi (bufferpos[-2]) << 4; + memset (bufferpos-2, '0', 3); + _itoa_word (regs[regno], bufferpos+1, 16, 0); + } + + /* Write the output. */ + write (fd, buffer, sizeof(buffer) - 1); +} + + +#define REGISTER_DUMP \ + register_dump (fd, ctx) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/rtld-memset.c b/REORG.TODO/sysdeps/powerpc/powerpc64/rtld-memset.c new file mode 100644 index 0000000000..f3ed8ad1e7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/rtld-memset.c @@ -0,0 +1,4 @@ +/* PPCA2 has a different cache-line size than the usual 128 bytes. To avoid + using code that assumes cache-line size to be 128 bytes (with dcbz + instructions) we use the generic code instead. */ +#include <string/memset.c> diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/setjmp-common.S b/REORG.TODO/sysdeps/powerpc/powerpc64/setjmp-common.S new file mode 100644 index 0000000000..20f6cf364c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/setjmp-common.S @@ -0,0 +1,245 @@ +/* setjmp for PowerPC64. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <stap-probe.h> +#define _ASM +#ifdef __NO_VMX__ +#include <novmxsetjmp.h> +#else +#include <jmpbuf-offsets.h> +#endif + +#ifndef __NO_VMX__ + .section ".toc","aw" +.LC__dl_hwcap: +# ifdef SHARED +# if IS_IN (rtld) + /* Inside ld.so we use the local alias to avoid runtime GOT + relocations. */ + .tc _rtld_local_ro[TC],_rtld_local_ro +# else + .tc _rtld_global_ro[TC],_rtld_global_ro +# endif +# else + .tc _dl_hwcap[TC],_dl_hwcap +# endif + .section ".text" +#endif + + .machine "altivec" +ENTRY (setjmp_symbol) + CALL_MCOUNT 1 + li r4,1 /* Set second argument to 1. */ + b JUMPTARGET (GLUE(__sigsetjmp_symbol,_ent)) +END (setjmp_symbol) + +#if defined SHARED && !IS_IN (rtld) && !defined __NO_VMX__ +/* When called from within libc we need a special version of _setjmp + that saves r2 since the call won't go via a plt call stub. See + bugz #269. __GI__setjmp is used in csu/libc-start.c when + HAVE_CLEANUP_JMP_BUF is defined. */ +ENTRY (__GI__setjmp) + std r2,FRAME_TOC_SAVE(r1) /* Save the callers TOC in the save area. */ + CALL_MCOUNT 1 + li r4,0 /* Set second argument to 0. */ + b JUMPTARGET (GLUE(__sigsetjmp_symbol,_ent)) +END (__GI__setjmp) +#endif + +ENTRY (_setjmp_symbol) + CALL_MCOUNT 1 + li r4,0 /* Set second argument to 0. */ + b JUMPTARGET (GLUE(__sigsetjmp_symbol,_ent)) +END (_setjmp_symbol) +libc_hidden_def (_setjmp_symbol) + +ENTRY (__sigsetjmp_symbol) + CALL_MCOUNT 2 +JUMPTARGET(GLUE(__sigsetjmp_symbol,_ent)): +#ifdef PTR_MANGLE + mr r5, r1 + PTR_MANGLE (r5, r6) + std r5,(JB_GPR1*8)(3) +#else + std r1,(JB_GPR1*8)(3) +#endif + mflr r0 +#if defined SHARED && !IS_IN (rtld) + ld r5,FRAME_TOC_SAVE(r1) /* Retrieve the callers TOC. */ + std r5,(JB_GPR2*8)(3) +#else + std r2,(JB_GPR2*8)(3) +#endif + /* setjmp probe expects longjmp first argument (8@3), second argument + (-4@4), and target address (8@0), respectively. */ + LIBC_PROBE (setjmp, 3, 8@3, -4@4, 8@0) + std r14,((JB_GPRS+0)*8)(3) + stfd fp14,((JB_FPRS+0)*8)(3) +#ifdef PTR_MANGLE + PTR_MANGLE2 (r0, r6) +#endif + std r0,(JB_LR*8)(3) + std r15,((JB_GPRS+1)*8)(3) + stfd fp15,((JB_FPRS+1)*8)(3) + mfcr r0 + std r16,((JB_GPRS+2)*8)(3) + stfd fp16,((JB_FPRS+2)*8)(3) + stw r0,((JB_CR*8)+4)(3) /* 32-bit CR. */ + std r17,((JB_GPRS+3)*8)(3) + stfd fp17,((JB_FPRS+3)*8)(3) + std r18,((JB_GPRS+4)*8)(3) + stfd fp18,((JB_FPRS+4)*8)(3) + std r19,((JB_GPRS+5)*8)(3) + stfd fp19,((JB_FPRS+5)*8)(3) + std r20,((JB_GPRS+6)*8)(3) + stfd fp20,((JB_FPRS+6)*8)(3) + std r21,((JB_GPRS+7)*8)(3) + stfd fp21,((JB_FPRS+7)*8)(3) + std r22,((JB_GPRS+8)*8)(3) + stfd fp22,((JB_FPRS+8)*8)(3) + std r23,((JB_GPRS+9)*8)(3) + stfd fp23,((JB_FPRS+9)*8)(3) + std r24,((JB_GPRS+10)*8)(3) + stfd fp24,((JB_FPRS+10)*8)(3) + std r25,((JB_GPRS+11)*8)(3) + stfd fp25,((JB_FPRS+11)*8)(3) + std r26,((JB_GPRS+12)*8)(3) + stfd fp26,((JB_FPRS+12)*8)(3) + std r27,((JB_GPRS+13)*8)(3) + stfd fp27,((JB_FPRS+13)*8)(3) + std r28,((JB_GPRS+14)*8)(3) + stfd fp28,((JB_FPRS+14)*8)(3) + std r29,((JB_GPRS+15)*8)(3) + stfd fp29,((JB_FPRS+15)*8)(3) + std r30,((JB_GPRS+16)*8)(3) + stfd fp30,((JB_FPRS+16)*8)(3) + std r31,((JB_GPRS+17)*8)(3) + stfd fp31,((JB_FPRS+17)*8)(3) +#ifndef __NO_VMX__ + ld r6,.LC__dl_hwcap@toc(r2) +# ifdef SHARED + /* Load _rtld-global._dl_hwcap. */ + ld r6,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r6) +# else + ld r6,0(r6) /* Load extern _dl_hwcap. */ +# endif + andis. r6,r6,(PPC_FEATURE_HAS_ALTIVEC >> 16) + beq L(no_vmx) + la r5,((JB_VRS)*8)(3) + andi. r6,r5,0xf + mfspr r0,VRSAVE + stw r0,((JB_VRSAVE)*8)(3) /* 32-bit VRSAVE. */ + addi r6,r5,16 + beq+ L(aligned_save_vmx) + + lvsr v0,0,r5 + lvsl v1,0,r5 + addi r6,r5,-16 + +# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \ + addi addgpr,addgpr,32; \ + vperm tmpvr,prevvr,savevr,shiftvr; \ + stvx tmpvr,0,savegpr + + /* + * We have to be careful not to corrupt the data below v20 and + * above v31. To keep things simple we just rotate both ends in + * the opposite direction to our main permute so we can use + * the common macro. + */ + + /* load and rotate data below v20 */ + lvx v2,0,r5 + vperm v2,v2,v2,v1 + save_misaligned_vmx(v20,v2,v0,v3,r5,r6) + save_misaligned_vmx(v21,v20,v0,v3,r6,r5) + save_misaligned_vmx(v22,v21,v0,v3,r5,r6) + save_misaligned_vmx(v23,v22,v0,v3,r6,r5) + save_misaligned_vmx(v24,v23,v0,v3,r5,r6) + save_misaligned_vmx(v25,v24,v0,v3,r6,r5) + save_misaligned_vmx(v26,v25,v0,v3,r5,r6) + save_misaligned_vmx(v27,v26,v0,v3,r6,r5) + save_misaligned_vmx(v28,v27,v0,v3,r5,r6) + save_misaligned_vmx(v29,v28,v0,v3,r6,r5) + save_misaligned_vmx(v30,v29,v0,v3,r5,r6) + save_misaligned_vmx(v31,v30,v0,v3,r6,r5) + /* load and rotate data above v31 */ + lvx v2,0,r6 + vperm v2,v2,v2,v1 + save_misaligned_vmx(v2,v31,v0,v3,r5,r6) + + b L(no_vmx) + +L(aligned_save_vmx): + stvx 20,0,r5 + addi r5,r5,32 + stvx 21,0,r6 + addi r6,r6,32 + stvx 22,0,r5 + addi r5,r5,32 + stvx 23,0,r6 + addi r6,r6,32 + stvx 24,0,r5 + addi r5,r5,32 + stvx 25,0,r6 + addi r6,r6,32 + stvx 26,0,r5 + addi r5,r5,32 + stvx 27,0,r6 + addi r6,r6,32 + stvx 28,0,r5 + addi r5,r5,32 + stvx 29,0,r6 + addi r6,r6,32 + stvx 30,0,r5 + stvx 31,0,r6 +L(no_vmx): +#else + li r6,0 +#endif +#if IS_IN (rtld) + li r3,0 + blr +#elif defined SHARED + b JUMPTARGET (__sigjmp_save_symbol) +#else + mflr r0 + std r0,FRAME_LR_SAVE(r1) + stdu r1,-FRAME_MIN_SIZE(r1) + cfi_adjust_cfa_offset(FRAME_MIN_SIZE) + cfi_offset(lr,FRAME_LR_SAVE) + bl JUMPTARGET (__sigjmp_save_symbol) + nop + ld r0,FRAME_MIN_SIZE+FRAME_LR_SAVE(r1) + addi r1,r1,FRAME_MIN_SIZE + mtlr r0 + blr +#endif +END (__sigsetjmp_symbol) + +#if defined SHARED && !IS_IN (rtld) && !defined __NO_VMX__ +/* When called from within libc we need a special version of __sigsetjmp + that saves r2 since the call won't go via a plt call stub. See + bugz #269. */ +ENTRY (__GI___sigsetjmp) + std r2,FRAME_TOC_SAVE(r1) /* Save the callers TOC in the save area. */ + CALL_MCOUNT 1 + b JUMPTARGET (GLUE(__sigsetjmp_symbol,_ent)) +END (__GI___sigsetjmp) +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/setjmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/setjmp.S new file mode 100644 index 0000000000..3f61d28203 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/setjmp.S @@ -0,0 +1,61 @@ +/* AltiVec (new) version of setjmp for PowerPC. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libc-symbols.h> +#include <rtld-global-offsets.h> +#include <shlib-compat.h> + +#if !IS_IN (libc) +/* Build a non-versioned object for rtld-*. */ +# define setjmp_symbol setjmp +# define _setjmp_symbol _setjmp +# define __sigsetjmp_symbol __sigsetjmp +# define __sigjmp_save_symbol __sigjmp_save +# include "setjmp-common.S" + +#else /* IS_IN (libc) */ +/* Build a versioned object for libc. */ +versioned_symbol (libc, __vmxsetjmp, setjmp, GLIBC_2_3_4) +versioned_symbol (libc, __vmx_setjmp, _setjmp, GLIBC_2_3_4) +versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4) +# define setjmp_symbol __vmxsetjmp +# define _setjmp_symbol __vmx_setjmp +# define __sigsetjmp_symbol __vmx__sigsetjmp +# define __sigjmp_save_symbol __vmx__sigjmp_save +# include "setjmp-common.S" +strong_alias (__vmxsetjmp, __vmx__setjmp) +strong_alias (__vmx__sigsetjmp, __setjmp) + +# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_3, GLIBC_2_3_4) +# undef setjmp_symbol +# undef _setjmp_symbol +# undef __sigsetjmp_symbol +# undef __sigjmp_save_symbol +# undef JB_SIZE +# define __NO_VMX__ +compat_symbol (libc, __novmxsetjmp, setjmp, GLIBC_2_3) +compat_symbol (libc, __novmx_setjmp,_setjmp, GLIBC_2_3); +compat_symbol (libc, __novmx__sigsetjmp,__sigsetjmp, GLIBC_2_3) +# define setjmp_symbol __novmxsetjmp +# define _setjmp_symbol __novmx_setjmp +# define __sigsetjmp_symbol __novmx__sigsetjmp +# define __sigjmp_save_symbol __novmx__sigjmp_save +# include "setjmp-common.S" +strong_alias (__novmxsetjmp, __novmx__setjmp) +# endif +#endif /* IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/stackguard-macros.h b/REORG.TODO/sysdeps/powerpc/powerpc64/stackguard-macros.h new file mode 100644 index 0000000000..e80a683e64 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/stackguard-macros.h @@ -0,0 +1,14 @@ +#include <stdint.h> + +#define STACK_CHK_GUARD \ + ({ uintptr_t x; asm ("ld %0,-28688(13)" : "=r" (x)); x; }) + +#define POINTER_CHK_GUARD \ + ({ \ + uintptr_t x; \ + asm ("ld %0,%1(13)" \ + : "=r" (x) \ + : "i" (offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) \ + ); \ + x; \ + }) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/start.S b/REORG.TODO/sysdeps/powerpc/powerpc64/start.S new file mode 100644 index 0000000000..937c39a740 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/start.S @@ -0,0 +1,92 @@ +/* Startup code for programs linked with GNU libc. PowerPC64 version. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* We do not want .eh_frame info for crt1.o since crt1.o is linked + before crtbegin.o, the file defining __EH_FRAME_BEGIN__. */ +#undef cfi_startproc +#define cfi_startproc +#undef cfi_endproc +#define cfi_endproc + + /* These are the various addresses we require. */ +#ifdef PIC + .section ".data.rel.ro.local","aw" +#else + .section ".rodata" +#endif + .align 3 +L(start_addresses): + .quad 0 /* was _SDA_BASE_ but not in 64-bit ABI*/ +/* function descriptors so don't need JUMPTARGET */ + .quad main + .quad __libc_csu_init + .quad __libc_csu_fini + + ASM_SIZE_DIRECTIVE(L(start_addresses)) + + .section ".toc","aw" +.L01: + .tc L(start_addresses)[TC],L(start_addresses) + .section ".text" +ENTRY(_start) + /* Save the stack pointer, in case we're statically linked under Linux. */ + mr r9,r1 + /* Set up an initial stack frame, and clear the LR. */ + clrrdi r1,r1,4 + li r0,0 + stdu r1,-128(r1) + mtlr r0 + std r0,0(r1) + + /* put the address of start_addresses in r8... ** +** PPC64 ABI uses R13 for thread local, so we leave it alone */ + ld r8,.L01@toc(r2) + + /* and continue in libc-start, in glibc. */ + b JUMPTARGET(__libc_start_main) +/* The linker needs this nop to recognize that it's OK to call via a + TOC adjusting stub. */ + nop + +END(_start) + +/* Define a symbol for the first piece of initialized data. */ + .section ".data" + .globl __data_start +__data_start: + .long 0 +weak_alias (__data_start, data_start) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S b/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S new file mode 100644 index 0000000000..cbfcc14cfe --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/strchr.S @@ -0,0 +1,155 @@ +/* Optimized strchr implementation for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how this works. */ + +/* char * [r3] strchr (const char *s [r3] , int c [r4] ) */ + +#ifndef STRCHR +# define STRCHR strchr +#endif + +ENTRY (STRCHR) + CALL_MCOUNT 2 + +#define rTMP1 r0 +#define rRTN r3 /* outgoing result */ +#define rSTR r8 /* current word pointer */ +#define rCHR r4 /* byte we're looking for, spread over the whole word */ +#define rWORD r5 /* the current word */ +#define rCLZB rCHR /* leading zero byte count */ +#define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rTMP2 r9 +#define rIGN r10 /* number of bits we should ignore in the first word */ +#define rMASK r11 /* mask with the bits to ignore set to 0 */ +#define rTMP3 r12 +#define rTMP4 rIGN +#define rTMP5 rMASK + + dcbt 0,rRTN + insrdi rCHR, rCHR, 8, 48 + li rMASK, -1 + insrdi rCHR, rCHR, 16, 32 + rlwinm rIGN, rRTN, 3, 26, 28 + insrdi rCHR, rCHR, 32, 0 + lis rFEFE, -0x101 + lis r7F7F, 0x7f7f + clrrdi rSTR, rRTN, 3 + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + sldi rTMP1, rFEFE, 32 + insrdi r7F7F, r7F7F, 32, 0 + add rFEFE, rFEFE, rTMP1 +/* Test the first (partial?) word. */ + ld rWORD, 0(rSTR) +#ifdef __LITTLE_ENDIAN__ + sld rMASK, rMASK, rIGN +#else + srd rMASK, rMASK, rIGN +#endif + orc rWORD, rWORD, rMASK + add rTMP1, rFEFE, rWORD + nor rTMP2, r7F7F, rWORD + and. rTMP4, rTMP1, rTMP2 + xor rTMP3, rCHR, rWORD + orc rTMP3, rTMP3, rMASK + b L(loopentry) + +/* The loop. */ + +L(loop): + ldu rWORD, 8(rSTR) + and. rTMP5, rTMP1, rTMP2 +/* Test for 0. */ + add rTMP1, rFEFE, rWORD /* x - 0x01010101. */ + nor rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080. */ + bne L(foundit) + and. rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080. */ +/* Start test for the bytes we're looking for. */ + xor rTMP3, rCHR, rWORD +L(loopentry): + add rTMP1, rFEFE, rTMP3 + nor rTMP2, r7F7F, rTMP3 + beq L(loop) + +/* There is a zero byte in the word, but may also be a matching byte (either + before or after the zero byte). In fact, we may be looking for a + zero byte, in which case we return a match. */ + and. rTMP5, rTMP1, rTMP2 + li rRTN, 0 + beqlr +/* At this point: + rTMP5 bytes are 0x80 for each match of c, 0 otherwise. + rTMP4 bytes are 0x80 for each match of 0, 0 otherwise. + But there may be false matches in the next most significant byte from + a true match due to carries. This means we need to recalculate the + matches using a longer method for big-endian. */ +#ifdef __LITTLE_ENDIAN__ + addi rTMP1, rTMP5, -1 + andc rTMP1, rTMP1, rTMP5 + cntlzd rCLZB, rTMP1 + addi rTMP2, rTMP4, -1 + andc rTMP2, rTMP2, rTMP4 + cmpld rTMP1, rTMP2 + bgtlr + subfic rCLZB, rCLZB, 64-7 +#else +/* I think we could reduce this by two instructions by keeping the "nor" + results from the loop for reuse here. See strlen.S tail. Similarly + one instruction could be pruned from L(foundit). */ + and rFEFE, r7F7F, rWORD + or rTMP5, r7F7F, rWORD + and rTMP1, r7F7F, rTMP3 + or rTMP4, r7F7F, rTMP3 + add rFEFE, rFEFE, r7F7F + add rTMP1, rTMP1, r7F7F + nor rWORD, rTMP5, rFEFE + nor rTMP2, rTMP4, rTMP1 + cntlzd rCLZB, rTMP2 + cmpld rWORD, rTMP2 + bgtlr +#endif + srdi rCLZB, rCLZB, 3 + add rRTN, rSTR, rCLZB + blr + +L(foundit): +#ifdef __LITTLE_ENDIAN__ + addi rTMP1, rTMP5, -1 + andc rTMP1, rTMP1, rTMP5 + cntlzd rCLZB, rTMP1 + subfic rCLZB, rCLZB, 64-7-64 + sradi rCLZB, rCLZB, 3 +#else + and rTMP1, r7F7F, rTMP3 + or rTMP4, r7F7F, rTMP3 + add rTMP1, rTMP1, r7F7F + nor rTMP2, rTMP4, rTMP1 + cntlzd rCLZB, rTMP2 + subi rSTR, rSTR, 8 + srdi rCLZB, rCLZB, 3 +#endif + add rRTN, rSTR, rCLZB + blr +END (STRCHR) + +weak_alias (strchr, index) +libc_hidden_builtin_def (strchr) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/strcmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/strcmp.S new file mode 100644 index 0000000000..ab5f8c231c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/strcmp.S @@ -0,0 +1,180 @@ +/* Optimized strcmp implementation for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */ + +#ifndef STRCMP +# define STRCMP strcmp +#endif + +EALIGN (STRCMP, 4, 0) + CALL_MCOUNT 2 + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rWORD1 r5 /* current word in s1 */ +#define rWORD2 r6 /* current word in s2 */ +#define rFEFE r7 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r8 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ +#define rBITDIF r10 /* bits that differ in s1 & s2 words */ +#define rTMP r11 + + dcbt 0,rSTR1 + or rTMP, rSTR2, rSTR1 + dcbt 0,rSTR2 + clrldi. rTMP, rTMP, 61 + lis rFEFE, -0x101 + bne L(unaligned) + + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) + lis r7F7F, 0x7f7f + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + sldi rTMP, rFEFE, 32 + insrdi r7F7F, r7F7F, 32, 0 + add rFEFE, rFEFE, rTMP + b L(g1) + +L(g0): ldu rWORD1, 8(rSTR1) + bne cr1, L(different) + ldu rWORD2, 8(rSTR2) +L(g1): add rTMP, rFEFE, rWORD1 + nor rNEG, r7F7F, rWORD1 + and. rTMP, rTMP, rNEG + cmpd cr1, rWORD1, rWORD2 + beq+ L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ +#ifdef __LITTLE_ENDIAN__ +L(endstring): + addi rTMP2, rTMP, -1 + beq cr1, L(equal) + andc rTMP2, rTMP2, rTMP + rldimi rTMP2, rTMP2, 1, 0 + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + cmpd cr1, rWORD1, rWORD2 + beq cr1, L(equal) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr + +#else +L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzd rBITDIF, rBITDIF + cntlzd rNEG, rNEG + addi rNEG, rNEG, 7 + cmpd cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + blt- cr1, L(equal) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(unaligned): + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + b L(u1) + +L(u0): lbzu rWORD1, 1(rSTR1) + bne- L(u4) + lbzu rWORD2, 1(rSTR2) +L(u1): cmpwi cr1, rWORD1, 0 + beq- cr1, L(u3) + cmpd rWORD1, rWORD2 + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + lbzu rWORD2, 1(rSTR2) + cmpdi cr1, rWORD1, 0 + cmpd rWORD1, rWORD2 + bne+ cr1, L(u0) +L(u3): sub rRTN, rWORD1, rWORD2 + blr +L(u4): lbz rWORD1, -1(rSTR1) + sub rRTN, rWORD1, rWORD2 + blr +END (STRCMP) +libc_hidden_builtin_def (strcmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/strlen.S b/REORG.TODO/sysdeps/powerpc/powerpc64/strlen.S new file mode 100644 index 0000000000..1466624c6a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/strlen.S @@ -0,0 +1,203 @@ +/* Optimized strlen implementation for PowerPC64. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* The algorithm here uses the following techniques: + + 1) Given a word 'x', we can test to see if it contains any 0 bytes + by subtracting 0x01010101, and seeing if any of the high bits of each + byte changed from 0 to 1. This works because the least significant + 0 byte must have had no incoming carry (otherwise it's not the least + significant), so it is 0x00 - 0x01 == 0xff. For all other + byte values, either they have the high bit set initially, or when + 1 is subtracted you get a value in the range 0x00-0x7f, none of which + have their high bit set. The expression here is + (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when + there were no 0x00 bytes in the word. You get 0x80 in bytes that + match, but possibly false 0x80 matches in the next more significant + byte to a true match due to carries. For little-endian this is + of no consequence since the least significant match is the one + we're interested in, but big-endian needs method 2 to find which + byte matches. + + 2) Given a word 'x', we can test to see _which_ byte was zero by + calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f). + This produces 0x80 in each byte that was zero, and 0x00 in all + the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each + byte, and the '| x' part ensures that bytes with the high bit set + produce 0x00. The addition will carry into the high bit of each byte + iff that byte had one of its low 7 bits set. We can then just see + which was the most significant bit set and divide by 8 to find how + many to add to the index. + This is from the book 'The PowerPC Compiler Writer's Guide', + by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren. + + We deal with strings not aligned to a word boundary by taking the + first word and ensuring that bytes not part of the string + are treated as nonzero. To allow for memory latency, we unroll the + loop a few times, being careful to ensure that we do not read ahead + across cache line boundaries. + + Questions to answer: + 1) How long are strings passed to strlen? If they're often really long, + we should probably use cache management instructions and/or unroll the + loop more. If they're often quite short, it might be better to use + fact (2) in the inner loop than have to recalculate it. + 2) How popular are bytes with the high bit set? If they are very rare, + on some processors it might be useful to use the simpler expression + ~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one + ALU), but this fails when any character has its high bit set. + + Answer: + 1) Added a Data Cache Block Touch early to prefetch the first 128 + byte cache line. Adding dcbt instructions to the loop would not be + effective since most strings will be shorter than the cache line. */ + +/* Some notes on register usage: Under the SVR4 ABI, we can use registers + 0 and 3 through 12 (so long as we don't call any procedures) without + saving them. We can also use registers 14 through 31 if we save them. + We can't use r1 (it's the stack pointer), r2 nor r13 because the user + program may expect them to hold their usual value if we get sent + a signal. Integer parameters are passed in r3 through r10. + We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving + them, the others we must save. */ + +/* int [r3] strlen (char *s [r3]) */ + +#ifndef STRLEN +# define STRLEN strlen +#endif + +ENTRY (STRLEN) + CALL_MCOUNT 1 + +#define rTMP4 r0 +#define rRTN r3 /* incoming STR arg, outgoing result */ +#define rSTR r4 /* current string position */ +#define rPADN r5 /* number of padding bits we prepend to the + string to make it start at a word boundary */ +#define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rWORD1 r8 /* current string doubleword */ +#define rWORD2 r9 /* next string doubleword */ +#define rMASK r9 /* mask for first string doubleword */ +#define rTMP1 r10 +#define rTMP2 r11 +#define rTMP3 r12 + + dcbt 0,rRTN + clrrdi rSTR, rRTN, 3 + lis r7F7F, 0x7f7f + rlwinm rPADN, rRTN, 3, 26, 28 + ld rWORD1, 0(rSTR) + addi r7F7F, r7F7F, 0x7f7f + li rMASK, -1 + insrdi r7F7F, r7F7F, 32, 0 +/* We use method (2) on the first two doublewords, because rFEFE isn't + required which reduces setup overhead. Also gives a faster return + for small strings on big-endian due to needing to recalculate with + method (2) anyway. */ +#ifdef __LITTLE_ENDIAN__ + sld rMASK, rMASK, rPADN +#else + srd rMASK, rMASK, rPADN +#endif + and rTMP1, r7F7F, rWORD1 + or rTMP2, r7F7F, rWORD1 + lis rFEFE, -0x101 + add rTMP1, rTMP1, r7F7F + addi rFEFE, rFEFE, -0x101 + nor rTMP3, rTMP2, rTMP1 + and. rTMP3, rTMP3, rMASK + mtcrf 0x01, rRTN + bne L(done0) + sldi rTMP1, rFEFE, 32 + add rFEFE, rFEFE, rTMP1 +/* Are we now aligned to a doubleword boundary? */ + bt 28, L(loop) + +/* Handle second doubleword of pair. */ +/* Perhaps use method (1) here for little-endian, saving one instruction? */ + ldu rWORD1, 8(rSTR) + and rTMP1, r7F7F, rWORD1 + or rTMP2, r7F7F, rWORD1 + add rTMP1, rTMP1, r7F7F + nor. rTMP3, rTMP2, rTMP1 + bne L(done0) + +/* The loop. */ + +L(loop): + ld rWORD1, 8(rSTR) + ldu rWORD2, 16(rSTR) + add rTMP1, rFEFE, rWORD1 + nor rTMP2, r7F7F, rWORD1 + and. rTMP1, rTMP1, rTMP2 + add rTMP3, rFEFE, rWORD2 + nor rTMP4, r7F7F, rWORD2 + bne L(done1) + and. rTMP3, rTMP3, rTMP4 + beq L(loop) + +#ifndef __LITTLE_ENDIAN__ + and rTMP1, r7F7F, rWORD2 + add rTMP1, rTMP1, r7F7F + andc rTMP3, rTMP4, rTMP1 + b L(done0) + +L(done1): + and rTMP1, r7F7F, rWORD1 + subi rSTR, rSTR, 8 + add rTMP1, rTMP1, r7F7F + andc rTMP3, rTMP2, rTMP1 + +/* When we get to here, rSTR points to the first doubleword in the string that + contains a zero byte, and rTMP3 has 0x80 for bytes that are zero, and 0x00 + otherwise. */ +L(done0): + cntlzd rTMP3, rTMP3 + subf rTMP1, rRTN, rSTR + srdi rTMP3, rTMP3, 3 + add rRTN, rTMP1, rTMP3 + blr +#else + +L(done0): + addi rTMP1, rTMP3, -1 /* Form a mask from trailing zeros. */ + andc rTMP1, rTMP1, rTMP3 + cntlzd rTMP1, rTMP1 /* Count bits not in the mask. */ + subf rTMP3, rRTN, rSTR + subfic rTMP1, rTMP1, 64-7 + srdi rTMP1, rTMP1, 3 + add rRTN, rTMP1, rTMP3 + blr + +L(done1): + addi rTMP3, rTMP1, -1 + andc rTMP3, rTMP3, rTMP1 + cntlzd rTMP3, rTMP3 + subf rTMP1, rRTN, rSTR + subfic rTMP3, rTMP3, 64-7-64 + sradi rTMP3, rTMP3, 3 + add rRTN, rTMP1, rTMP3 + blr +#endif + +END (STRLEN) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/strncmp.S b/REORG.TODO/sysdeps/powerpc/powerpc64/strncmp.S new file mode 100644 index 0000000000..076599804a --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/strncmp.S @@ -0,0 +1,210 @@ +/* Optimized strcmp implementation for PowerPC64. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* See strlen.s for comments on how the end-of-string testing works. */ + +/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */ + +#ifndef STRNCMP +# define STRNCMP strncmp +#endif + +EALIGN (STRNCMP, 4, 0) + CALL_MCOUNT 3 + +#define rTMP2 r0 +#define rRTN r3 +#define rSTR1 r3 /* first string arg */ +#define rSTR2 r4 /* second string arg */ +#define rN r5 /* max string length */ +#define rWORD1 r6 /* current word in s1 */ +#define rWORD2 r7 /* current word in s2 */ +#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ +#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */ +#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ +#define rBITDIF r11 /* bits that differ in s1 & s2 words */ +#define rTMP r12 + + dcbt 0,rSTR1 + or rTMP, rSTR2, rSTR1 + lis r7F7F, 0x7f7f + dcbt 0,rSTR2 + clrldi. rTMP, rTMP, 61 + cmpldi cr1, rN, 0 + lis rFEFE, -0x101 + bne L(unaligned) +/* We are doubleword aligned so set up for two loops. first a double word + loop, then fall into the byte loop if any residual. */ + srdi. rTMP, rN, 3 + clrldi rN, rN, 61 + addi rFEFE, rFEFE, -0x101 + addi r7F7F, r7F7F, 0x7f7f + cmpldi cr1, rN, 0 + beq L(unaligned) + + mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */ + ld rWORD1, 0(rSTR1) + ld rWORD2, 0(rSTR2) + sldi rTMP, rFEFE, 32 + insrdi r7F7F, r7F7F, 32, 0 + add rFEFE, rFEFE, rTMP + b L(g1) + +L(g0): + ldu rWORD1, 8(rSTR1) + bne- cr1, L(different) + ldu rWORD2, 8(rSTR2) +L(g1): add rTMP, rFEFE, rWORD1 + nor rNEG, r7F7F, rWORD1 + bdz L(tail) + and. rTMP, rTMP, rNEG + cmpd cr1, rWORD1, rWORD2 + beq+ L(g0) + +/* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of gunk beyond + the end of the strings... */ + +#ifdef __LITTLE_ENDIAN__ +L(endstring): + addi rTMP2, rTMP, -1 + beq cr1, L(equal) + andc rTMP2, rTMP2, rTMP + rldimi rTMP2, rTMP2, 1, 0 + and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ + and rWORD1, rWORD1, rTMP2 + cmpd cr1, rWORD1, rWORD2 + beq cr1, L(equal) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ + neg rNEG, rBITDIF + and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ + cntlzd rNEG, rNEG /* bitcount of the bit. */ + andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ + sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ + sld rWORD2, rWORD2, rNEG + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr + +#else +L(endstring): + and rTMP, r7F7F, rWORD1 + beq cr1, L(equal) + add rTMP, rTMP, r7F7F + xor. rBITDIF, rWORD1, rWORD2 + andc rNEG, rNEG, rTMP + blt- L(highbit) + cntlzd rBITDIF, rBITDIF + cntlzd rNEG, rNEG + addi rNEG, rNEG, 7 + cmpd cr1, rNEG, rBITDIF + sub rRTN, rWORD1, rWORD2 + blt- cr1, L(equal) + sradi rRTN, rRTN, 63 /* must return an int. */ + ori rRTN, rRTN, 1 + blr +L(equal): + li rRTN, 0 + blr + +L(different): + ld rWORD1, -8(rSTR1) + xor. rBITDIF, rWORD1, rWORD2 + sub rRTN, rWORD1, rWORD2 + blt- L(highbit) + sradi rRTN, rRTN, 63 + ori rRTN, rRTN, 1 + blr +L(highbit): + sradi rRTN, rWORD2, 63 + ori rRTN, rRTN, 1 + blr +#endif + +/* Oh well. In this case, we just do a byte-by-byte comparison. */ + .align 4 +L(tail): + and. rTMP, rTMP, rNEG + cmpd cr1, rWORD1, rWORD2 + bne- L(endstring) + addi rSTR1, rSTR1, 8 + bne- cr1, L(different) + addi rSTR2, rSTR2, 8 + cmpldi cr1, rN, 0 +L(unaligned): + mtctr rN /* Power4 wants mtctr 1st in dispatch group */ + bgt cr1, L(uz) +L(ux): + li rRTN, 0 + blr + .align 4 +L(uz): + lbz rWORD1, 0(rSTR1) + lbz rWORD2, 0(rSTR2) + nop + b L(u1) +L(u0): + lbzu rWORD2, 1(rSTR2) +L(u1): + bdz L(u3) + cmpdi cr1, rWORD1, 0 + cmpd rWORD1, rWORD2 + beq- cr1, L(u3) + lbzu rWORD1, 1(rSTR1) + bne- L(u2) + lbzu rWORD2, 1(rSTR2) + bdz L(u3) + cmpdi cr1, rWORD1, 0 + cmpd rWORD1, rWORD2 + bne- L(u3) + lbzu rWORD1, 1(rSTR1) + bne+ cr1, L(u0) + +L(u2): lbzu rWORD1, -1(rSTR1) +L(u3): sub rRTN, rWORD1, rWORD2 + blr +END (STRNCMP) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/submul_1.S b/REORG.TODO/sysdeps/powerpc/powerpc64/submul_1.S new file mode 100644 index 0000000000..df93b4c3f4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/submul_1.S @@ -0,0 +1,21 @@ +/* PowerPC64 __mpn_addmul_1 -- Multiply a limb vector with a limb and subtract + the result to a second limb vector. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_SUBMUL +#include "addmul_1.S" diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/sysdep.h b/REORG.TODO/sysdeps/powerpc/powerpc64/sysdep.h new file mode 100644 index 0000000000..db7c1d78b5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/sysdep.h @@ -0,0 +1,425 @@ +/* Assembly macros for 64-bit PowerPC. + Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/powerpc/sysdep.h> + +#ifdef __ASSEMBLER__ + +/* Stack frame offsets. */ +#if _CALL_ELF != 2 +#define FRAME_MIN_SIZE 112 +#define FRAME_MIN_SIZE_PARM 112 +#define FRAME_BACKCHAIN 0 +#define FRAME_CR_SAVE 8 +#define FRAME_LR_SAVE 16 +#define FRAME_TOC_SAVE 40 +#define FRAME_PARM_SAVE 48 +#define FRAME_PARM1_SAVE 48 +#define FRAME_PARM2_SAVE 56 +#define FRAME_PARM3_SAVE 64 +#define FRAME_PARM4_SAVE 72 +#define FRAME_PARM5_SAVE 80 +#define FRAME_PARM6_SAVE 88 +#define FRAME_PARM7_SAVE 96 +#define FRAME_PARM8_SAVE 104 +#define FRAME_PARM9_SAVE 112 +#else +#define FRAME_MIN_SIZE 32 +#define FRAME_MIN_SIZE_PARM 96 +#define FRAME_BACKCHAIN 0 +#define FRAME_CR_SAVE 8 +#define FRAME_LR_SAVE 16 +#define FRAME_TOC_SAVE 24 +#define FRAME_PARM_SAVE 32 +#define FRAME_PARM1_SAVE 32 +#define FRAME_PARM2_SAVE 40 +#define FRAME_PARM3_SAVE 48 +#define FRAME_PARM4_SAVE 56 +#define FRAME_PARM5_SAVE 64 +#define FRAME_PARM6_SAVE 72 +#define FRAME_PARM7_SAVE 80 +#define FRAME_PARM8_SAVE 88 +#define FRAME_PARM9_SAVE 96 +#endif + +/* Support macros for CALL_MCOUNT. */ +#if _CALL_ELF == 2 +#define call_mcount_parm_offset (-64) +#else +#define call_mcount_parm_offset FRAME_PARM_SAVE +#endif + .macro SAVE_ARG NARG + .if \NARG + SAVE_ARG \NARG-1 + std 2+\NARG,call_mcount_parm_offset-8+8*(\NARG)(1) + .endif + .endm + + .macro REST_ARG NARG + .if \NARG + REST_ARG \NARG-1 + ld 2+\NARG,FRAME_MIN_SIZE_PARM+call_mcount_parm_offset-8+8*(\NARG)(1) + .endif + .endm + + .macro CFI_SAVE_ARG NARG + .if \NARG + CFI_SAVE_ARG \NARG-1 + cfi_offset(2+\NARG,call_mcount_parm_offset-8+8*(\NARG)) + .endif + .endm + + .macro CFI_REST_ARG NARG + .if \NARG + CFI_REST_ARG \NARG-1 + cfi_restore(2+\NARG) + .endif + .endm + +/* If compiled for profiling, call `_mcount' at the start of each function. + see ppc-mcount.S for more details. */ + .macro CALL_MCOUNT NARG +#ifdef PROF + mflr r0 + SAVE_ARG \NARG + std r0,FRAME_LR_SAVE(r1) + stdu r1,-FRAME_MIN_SIZE_PARM(r1) + cfi_adjust_cfa_offset(FRAME_MIN_SIZE_PARM) + cfi_offset(lr,FRAME_LR_SAVE) + CFI_SAVE_ARG \NARG + bl JUMPTARGET (_mcount) +#ifndef SHARED + nop +#endif + ld r0,FRAME_MIN_SIZE_PARM+FRAME_LR_SAVE(r1) + REST_ARG \NARG + mtlr r0 + addi r1,r1,FRAME_MIN_SIZE_PARM + cfi_adjust_cfa_offset(-FRAME_MIN_SIZE_PARM) + cfi_restore(lr) + CFI_REST_ARG \NARG +#endif + .endm + +#if _CALL_ELF != 2 + +/* Macro to prepare for calling via a function pointer. */ + .macro PPC64_LOAD_FUNCPTR PTR + ld r12,0(\PTR) + ld r2,8(\PTR) + mtctr r12 + ld r11,16(\PTR) + .endm + +#ifdef USE_PPC64_OVERLAPPING_OPD +# define OPD_ENT(name) .quad BODY_LABEL (name), .TOC.@tocbase +#else +# define OPD_ENT(name) .quad BODY_LABEL (name), .TOC.@tocbase, 0 +#endif + +#define ENTRY_1(name) \ + .type BODY_LABEL(name),@function; \ + .globl name; \ + .section ".opd","aw"; \ + .align 3; \ +name##: OPD_ENT (name); \ + .previous; + +#define DOT_LABEL(X) X +#define BODY_LABEL(X) .LY##X +#define ENTRY_2(name) \ + .type name,@function; \ + ENTRY_1(name) +#define END_2(name) \ + .size name,.-BODY_LABEL(name); \ + .size BODY_LABEL(name),.-BODY_LABEL(name); +#define LOCALENTRY(name) + +#else /* _CALL_ELF */ + +/* Macro to prepare for calling via a function pointer. */ + .macro PPC64_LOAD_FUNCPTR PTR + mr r12,\PTR + mtctr r12 + .endm + +#define DOT_LABEL(X) X +#define BODY_LABEL(X) X +#define ENTRY_2(name) \ + .globl name; \ + .type name,@function; +#define END_2(name) \ + .size name,.-name; +#define LOCALENTRY(name) \ +1: addis r2,r12,.TOC.-1b@ha; \ + addi r2,r2,.TOC.-1b@l; \ + .localentry name,.-name; + +#endif /* _CALL_ELF */ + +#define ENTRY(name) \ + .section ".text"; \ + ENTRY_2(name) \ + .align ALIGNARG(2); \ +BODY_LABEL(name): \ + cfi_startproc; \ + LOCALENTRY(name) + +#define EALIGN_W_0 /* No words to insert. */ +#define EALIGN_W_1 nop +#define EALIGN_W_2 nop;nop +#define EALIGN_W_3 nop;nop;nop +#define EALIGN_W_4 EALIGN_W_3;nop +#define EALIGN_W_5 EALIGN_W_4;nop +#define EALIGN_W_6 EALIGN_W_5;nop +#define EALIGN_W_7 EALIGN_W_6;nop + +/* EALIGN is like ENTRY, but does alignment to 'words'*4 bytes + past a 2^alignt boundary. */ +#define EALIGN(name, alignt, words) \ + .section ".text"; \ + ENTRY_2(name) \ + .align ALIGNARG(alignt); \ + EALIGN_W_##words; \ +BODY_LABEL(name): \ + cfi_startproc; \ + LOCALENTRY(name) + +/* Local labels stripped out by the linker. */ +#undef L +#define L(x) .L##x + +#define tostring(s) #s +#define stringify(s) tostring(s) +#define XGLUE(a,b) a##b +#define GLUE(a,b) XGLUE(a,b) +#define LT_LABEL(name) GLUE(.LT,name) +#define LT_LABELSUFFIX(name,suffix) GLUE(GLUE(.LT,name),suffix) + +/* Support Traceback tables */ +#define TB_ASM 0x000c000000000000 +#define TB_GLOBALLINK 0x0000800000000000 +#define TB_IS_EPROL 0x0000400000000000 +#define TB_HAS_TBOFF 0x0000200000000000 +#define TB_INT_PROC 0x0000100000000000 +#define TB_HAS_CTL 0x0000080000000000 +#define TB_TOCLESS 0x0000040000000000 +#define TB_FP_PRESENT 0x0000020000000000 +#define TB_LOG_ABORT 0x0000010000000000 +#define TB_INT_HANDL 0x0000008000000000 +#define TB_NAME_PRESENT 0x0000004000000000 +#define TB_USES_ALLOCA 0x0000002000000000 +#define TB_SAVES_CR 0x0000000200000000 +#define TB_SAVES_LR 0x0000000100000000 +#define TB_STORES_BC 0x0000000080000000 +#define TB_FIXUP 0x0000000040000000 +#define TB_FP_SAVED(fprs) (((fprs) & 0x3f) << 24) +#define TB_GPR_SAVED(gprs) (((fprs) & 0x3f) << 16) +#define TB_FIXEDPARMS(parms) (((parms) & 0xff) << 8) +#define TB_FLOATPARMS(parms) (((parms) & 0x7f) << 1) +#define TB_PARMSONSTK 0x0000000000000001 + +#define PPC_HIGHER(v) (((v) >> 32) & 0xffff) +#define TB_DEFAULT TB_ASM | TB_HAS_TBOFF | TB_NAME_PRESENT + +#define TRACEBACK(name) \ +LT_LABEL(name): ; \ + .long 0 ; \ + .quad TB_DEFAULT ; \ + .long LT_LABEL(name)-BODY_LABEL(name) ; \ + .short LT_LABELSUFFIX(name,_name_end)-LT_LABELSUFFIX(name,_name_start) ; \ +LT_LABELSUFFIX(name,_name_start): ;\ + .ascii stringify(name) ; \ +LT_LABELSUFFIX(name,_name_end): ; \ + .align 2 ; + +#define TRACEBACK_MASK(name,mask) \ +LT_LABEL(name): ; \ + .long 0 ; \ + .quad TB_DEFAULT | mask ; \ + .long LT_LABEL(name)-BODY_LABEL(name) ; \ + .short LT_LABELSUFFIX(name,_name_end)-LT_LABELSUFFIX(name,_name_start) ; \ +LT_LABELSUFFIX(name,_name_start): ;\ + .ascii stringify(name) ; \ +LT_LABELSUFFIX(name,_name_end): ; \ + .align 2 ; + +/* END generates Traceback tables */ +#undef END +#define END(name) \ + cfi_endproc; \ + TRACEBACK(name) \ + END_2(name) + +/* This form supports more informative traceback tables */ +#define END_GEN_TB(name,mask) \ + cfi_endproc; \ + TRACEBACK_MASK(name,mask) \ + END_2(name) + +#if !IS_IN(rtld) && defined (ENABLE_LOCK_ELISION) +# define ABORT_TRANSACTION \ + cmpdi 13,0; \ + beq 1f; \ + lwz 0,TM_CAPABLE(13); \ + cmpwi 0,0; \ + beq 1f; \ + li 11,_ABORT_SYSCALL; \ + tabort. 11; \ + .align 4; \ +1: +#else +# define ABORT_TRANSACTION +#endif + +#define DO_CALL(syscall) \ + ABORT_TRANSACTION \ + li 0,syscall; \ + sc + +/* ppc64 is always PIC */ +#undef JUMPTARGET +#define JUMPTARGET(name) DOT_LABEL(name) + +#define PSEUDO(name, syscall_name, args) \ + .section ".text"; \ + ENTRY (name) \ + DO_CALL (SYS_ify (syscall_name)); + +#ifdef SHARED +#define TAIL_CALL_SYSCALL_ERROR \ + b JUMPTARGET(__syscall_error) +#else +/* Static version might be linked into a large app with a toc exceeding + 64k. We can't put a toc adjusting stub on a plain branch, so can't + tail call __syscall_error. */ +#define TAIL_CALL_SYSCALL_ERROR \ + .ifdef .Local_syscall_error; \ + b .Local_syscall_error; \ + .else; \ +.Local_syscall_error: \ + mflr 0; \ + std 0,FRAME_LR_SAVE(1); \ + stdu 1,-FRAME_MIN_SIZE(1); \ + cfi_adjust_cfa_offset(FRAME_MIN_SIZE); \ + cfi_offset(lr,FRAME_LR_SAVE); \ + bl JUMPTARGET(__syscall_error); \ + nop; \ + ld 0,FRAME_MIN_SIZE+FRAME_LR_SAVE(1); \ + addi 1,1,FRAME_MIN_SIZE; \ + cfi_adjust_cfa_offset(-FRAME_MIN_SIZE); \ + mtlr 0; \ + cfi_restore(lr); \ + blr; \ + .endif +#endif + +#define PSEUDO_RET \ + bnslr+; \ + TAIL_CALL_SYSCALL_ERROR + +#define ret PSEUDO_RET + +#undef PSEUDO_END +#define PSEUDO_END(name) \ + END (name) + +#define PSEUDO_NOERRNO(name, syscall_name, args) \ + .section ".text"; \ + ENTRY (name) \ + DO_CALL (SYS_ify (syscall_name)); + +#define PSEUDO_RET_NOERRNO \ + blr + +#define ret_NOERRNO PSEUDO_RET_NOERRNO + +#undef PSEUDO_END_NOERRNO +#define PSEUDO_END_NOERRNO(name) \ + END (name) + +#define PSEUDO_ERRVAL(name, syscall_name, args) \ + .section ".text"; \ + ENTRY (name) \ + DO_CALL (SYS_ify (syscall_name)); + +#define PSEUDO_RET_ERRVAL \ + blr + +#define ret_ERRVAL PSEUDO_RET_ERRVAL + +#undef PSEUDO_END_ERRVAL +#define PSEUDO_END_ERRVAL(name) \ + END (name) + +#else /* !__ASSEMBLER__ */ + +#if _CALL_ELF != 2 + +#define PPC64_LOAD_FUNCPTR(ptr) \ + "ld 12,0(" #ptr ");\n" \ + "ld 2,8(" #ptr ");\n" \ + "mtctr 12;\n" \ + "ld 11,16(" #ptr ");" + +#ifdef USE_PPC64_OVERLAPPING_OPD +# define OPD_ENT(name) ".quad " BODY_PREFIX #name ", .TOC.@tocbase;" +#else +# define OPD_ENT(name) ".quad " BODY_PREFIX #name ", .TOC.@tocbase, 0;" +#endif + +#define ENTRY_1(name) \ + ".type " BODY_PREFIX #name ",@function;\n" \ + ".globl " #name ";\n" \ + ".pushsection \".opd\",\"aw\";\n" \ + ".align 3;\n" \ +#name ":\n" \ + OPD_ENT (name) "\n" \ + ".popsection;" + +#define DOT_PREFIX "" +#define BODY_PREFIX ".LY" +#define ENTRY_2(name) \ + ".type " #name ",@function;\n" \ + ENTRY_1(name) +#define END_2(name) \ + ".size " #name ",.-" BODY_PREFIX #name ";\n" \ + ".size " BODY_PREFIX #name ",.-" BODY_PREFIX #name ";" +#define LOCALENTRY(name) + +#else /* _CALL_ELF */ + +#define PPC64_LOAD_FUNCPTR(ptr) \ + "mr 12," #ptr ";\n" \ + "mtctr 12;" + +#define DOT_PREFIX "" +#define BODY_PREFIX "" +#define ENTRY_2(name) \ + ".type " #name ",@function;\n" \ + ".globl " #name ";" +#define END_2(name) \ + ".size " #name ",.-" #name ";" +#define LOCALENTRY(name) \ + "1: addis 2,12,.TOC.-1b@ha;\n" \ + "addi 2,2,.TOC.-1b@l;\n" \ + ".localentry " #name ",.-" #name ";" + +#endif /* _CALL_ELF */ + +#endif /* __ASSEMBLER__ */ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/tls-macros.h b/REORG.TODO/sysdeps/powerpc/powerpc64/tls-macros.h new file mode 100644 index 0000000000..42a95ec5c1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/tls-macros.h @@ -0,0 +1,44 @@ +/* Include sysdeps/powerpc/tls-macros.h for __TLS_CALL_CLOBBERS */ +#include_next "tls-macros.h" + +/* PowerPC64 Local Exec TLS access. */ +#define TLS_LE(x) \ + ({ int * __result; \ + asm ("addis %0,13," #x "@tprel@ha\n\t" \ + "addi %0,%0," #x "@tprel@l" \ + : "=b" (__result) ); \ + __result; \ + }) +/* PowerPC64 Initial Exec TLS access. */ +#define TLS_IE(x) \ + ({ int * __result; \ + asm ("ld %0," #x "@got@tprel(2)\n\t" \ + "add %0,%0," #x "@tls" \ + : "=r" (__result) ); \ + __result; \ + }) + +#define __TLS_GET_ADDR "__tls_get_addr" + +/* PowerPC64 Local Dynamic TLS access. */ +#define TLS_LD(x) \ + ({ int * __result; \ + asm ("addi 3,2," #x "@got@tlsld\n\t" \ + "bl " __TLS_GET_ADDR "\n\t" \ + "nop \n\t" \ + "addis %0,3," #x "@dtprel@ha\n\t" \ + "addi %0,%0," #x "@dtprel@l" \ + : "=b" (__result) : \ + : "3", __TLS_CALL_CLOBBERS); \ + __result; \ + }) +/* PowerPC64 General Dynamic TLS access. */ +#define TLS_GD(x) \ + ({ register int *__result __asm__ ("r3"); \ + asm ("addi 3,2," #x "@got@tlsgd\n\t" \ + "bl " __TLS_GET_ADDR "\n\t" \ + "nop " \ + : "=r" (__result) : \ + : __TLS_CALL_CLOBBERS); \ + __result; \ + }) diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64/tst-audit.h b/REORG.TODO/sysdeps/powerpc/powerpc64/tst-audit.h new file mode 100644 index 0000000000..b25040b9f0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64/tst-audit.h @@ -0,0 +1,33 @@ +/* Definitions for testing PLT entry/exit auditing. PowerPC64 version. + + Copyright (C) 2012-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#if _CALL_ELF != 2 +#define pltenter la_ppc64_gnu_pltenter +#define pltexit la_ppc64_gnu_pltexit +#define La_regs La_ppc64_regs +#define La_retval La_ppc64_retval +#define int_retval lrv_r3 +#else +#define pltenter la_ppc64v2_gnu_pltenter +#define pltexit la_ppc64v2_gnu_pltexit +#define La_regs La_ppc64v2_regs +#define La_retval La_ppc64v2_retval +#define int_retval lrv_r3 +#endif diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/Implies new file mode 100644 index 0000000000..a105a325f7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/Implies @@ -0,0 +1 @@ +powerpc/powerpc64 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/fpu/Implies new file mode 100644 index 0000000000..c1f617b7da --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/fpu/multiarch/Implies new file mode 100644 index 0000000000..8d6531a174 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/multiarch/Implies new file mode 100644 index 0000000000..30edcf7f9d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/Implies new file mode 100644 index 0000000000..eedef823d5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power7 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/fpu/Implies new file mode 100644 index 0000000000..8447198fbc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power7/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/fpu/multiarch/Implies new file mode 100644 index 0000000000..7fd86fdf87 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power7/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/multiarch/Implies new file mode 100644 index 0000000000..1fc7b7cd39 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power7/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power7/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/Implies new file mode 100644 index 0000000000..3c37351dcc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power8 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/fpu/Implies new file mode 100644 index 0000000000..ae0dbaf857 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power8/fpu diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/fpu/multiarch/Implies new file mode 100644 index 0000000000..f11e1bdba2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power8/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/multiarch/Implies new file mode 100644 index 0000000000..dd6bca4b36 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power8/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power8/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/Implies new file mode 100644 index 0000000000..efe5d4193c --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power9 diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/fpu/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/fpu/Implies new file mode 100644 index 0000000000..3633114b47 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/fpu/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power9/fpu/ diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/fpu/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/fpu/multiarch/Implies new file mode 100644 index 0000000000..105948092d --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/fpu/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power9/fpu/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/multiarch/Implies b/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/multiarch/Implies new file mode 100644 index 0000000000..02be30cfd0 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/powerpc64le/power9/multiarch/Implies @@ -0,0 +1 @@ +powerpc/powerpc64/power9/multiarch diff --git a/REORG.TODO/sysdeps/powerpc/preconfigure b/REORG.TODO/sysdeps/powerpc/preconfigure new file mode 100644 index 0000000000..7de2eafd52 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/preconfigure @@ -0,0 +1,20 @@ +# preconfigure fragment for powerpc. + +case "$machine" in +powerpc64le) + base_machine=powerpc machine=powerpc/powerpc64le + ;; +powerpc64*) + base_machine=powerpc machine=powerpc/powerpc64 + ;; +powerpc*) + # Check for e500. + $CC $CFLAGS $CPPFLAGS -E -dM -xc /dev/null > conftest.i + if grep -q __NO_FPRS__ conftest.i && ! grep -q _SOFT_FLOAT conftest.i; then + base_machine=powerpc machine=powerpc/powerpc32/e500 + else + base_machine=powerpc machine=powerpc/powerpc32 + fi + rm -f conftest.i + ;; +esac diff --git a/REORG.TODO/sysdeps/powerpc/rtld-global-offsets.sym b/REORG.TODO/sysdeps/powerpc/rtld-global-offsets.sym new file mode 100644 index 0000000000..f5ea5a1466 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/rtld-global-offsets.sym @@ -0,0 +1,8 @@ +#define SHARED 1 + +#include <ldsodefs.h> + +#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem) + +RTLD_GLOBAL_RO_DL_HWCAP_OFFSET rtld_global_ro_offsetof (_dl_hwcap) +RTLD_GLOBAL_RO_DL_HWCAP2_OFFSET rtld_global_ro_offsetof (_dl_hwcap2) diff --git a/REORG.TODO/sysdeps/powerpc/sched_cpucount.c b/REORG.TODO/sysdeps/powerpc/sched_cpucount.c new file mode 100644 index 0000000000..13d17ac555 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/sched_cpucount.c @@ -0,0 +1,22 @@ +/* Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef _ARCH_PWR5 +# define POPCNT(l) __builtin_popcountl (l) +#endif + +#include <posix/sched_cpucount.c> diff --git a/REORG.TODO/sysdeps/powerpc/sigjmp.c b/REORG.TODO/sysdeps/powerpc/sigjmp.c new file mode 100644 index 0000000000..6d593a0992 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/sigjmp.c @@ -0,0 +1,39 @@ +/* Copyright (C) 1992-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Versioned copy of sysdeps/generic/sigjmp.c modified for AltiVec support. */ + +#include <shlib-compat.h> +#include <stddef.h> +#include <setjmp.h> +#include <signal.h> + +/* This function is called by the `sigsetjmp' macro + before doing a `__setjmp' on ENV[0].__jmpbuf. + Always return zero. */ + +int +__vmx__sigjmp_save (sigjmp_buf env, int savemask) +{ + env[0].__mask_was_saved = (savemask && + __sigprocmask (SIG_BLOCK, (sigset_t *) NULL, + &env[0].__saved_mask) == 0); + + return 0; +} + +strong_alias (__vmx__sigjmp_save,__sigjmp_save) diff --git a/REORG.TODO/sysdeps/powerpc/soft-fp/sfp-machine.h b/REORG.TODO/sysdeps/powerpc/soft-fp/sfp-machine.h new file mode 100644 index 0000000000..d92a90e3e2 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/soft-fp/sfp-machine.h @@ -0,0 +1,114 @@ +#define _FP_W_TYPE_SIZE 32 +#define _FP_W_TYPE unsigned long +#define _FP_WS_TYPE signed long +#define _FP_I_TYPE long + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_loop(S,R,X,Y) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 +#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 +#define _FP_NANSIGN_S 0 +#define _FP_NANSIGN_D 0 +#define _FP_NANSIGN_Q 0 + +#define _FP_KEEPNANFRACP 1 +#define _FP_QNANNEGATEDP 0 + +/* Someone please check this. */ +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \ + && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) \ + { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,Y); \ + } \ + else \ + { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + } \ + R##_c = FP_CLS_NAN; \ + } while (0) + +#define _FP_TININESS_AFTER_ROUNDING 0 + +#if defined __NO_FPRS__ && !defined _SOFT_FLOAT + +/* Exception flags. We use the bit positions of the appropriate bits + in the FPEFSCR. */ + +# include <fenv_libc.h> +# include <sysdep.h> +# include <sys/prctl.h> + +int __feraiseexcept_soft (int); +libc_hidden_proto (__feraiseexcept_soft) + +# define FP_EX_INEXACT SPEFSCR_FINXS +# define FP_EX_INVALID SPEFSCR_FINVS +# define FP_EX_DIVZERO SPEFSCR_FDBZS +# define FP_EX_UNDERFLOW SPEFSCR_FUNFS +# define FP_EX_OVERFLOW SPEFSCR_FOVFS + +# define _FP_DECL_EX \ + int _spefscr __attribute__ ((unused)), _ftrapex __attribute__ ((unused)) = 0 +# define FP_INIT_ROUNDMODE \ + do \ + { \ + int _r; \ + INTERNAL_SYSCALL_DECL (_err); \ + \ + _spefscr = fegetenv_register (); \ + _r = INTERNAL_SYSCALL (prctl, _err, 2, PR_GET_FPEXC, &_ftrapex); \ + if (INTERNAL_SYSCALL_ERROR_P (_r, _err)) \ + _ftrapex = 0; \ + } \ + while (0) +# define FP_INIT_EXCEPTIONS /* Empty. */ + +# define FP_HANDLE_EXCEPTIONS __feraiseexcept_soft (_fex) +# define FP_ROUNDMODE (_spefscr & 0x3) + +/* Not correct in general, but sufficient for the uses in soft-fp. */ +# define FP_TRAPPING_EXCEPTIONS (_ftrapex & PR_FP_EXC_UND \ + ? FP_EX_UNDERFLOW \ + : 0) + +#else + +/* Exception flags. We use the bit positions of the appropriate bits + in the FPSCR, which also correspond to the FE_* bits. This makes + everything easier ;-). */ +# define FP_EX_INVALID (1 << (31 - 2)) +# define FP_EX_OVERFLOW (1 << (31 - 3)) +# define FP_EX_UNDERFLOW (1 << (31 - 4)) +# define FP_EX_DIVZERO (1 << (31 - 5)) +# define FP_EX_INEXACT (1 << (31 - 6)) + +# define FP_HANDLE_EXCEPTIONS __simulate_exceptions (_fex) +# define FP_ROUNDMODE __sim_round_mode_thread +# define FP_TRAPPING_EXCEPTIONS \ + (~__sim_disabled_exceptions_thread & 0x3e000000) + +#endif + +extern __thread int __sim_exceptions_thread attribute_tls_model_ie; +libc_hidden_tls_proto (__sim_exceptions_thread, tls_model ("initial-exec")); +extern __thread int __sim_disabled_exceptions_thread attribute_tls_model_ie; +libc_hidden_tls_proto (__sim_disabled_exceptions_thread, + tls_model ("initial-exec")); +extern __thread int __sim_round_mode_thread attribute_tls_model_ie; +libc_hidden_tls_proto (__sim_round_mode_thread, tls_model ("initial-exec")); + +extern void __simulate_exceptions (int x) attribute_hidden; diff --git a/REORG.TODO/sysdeps/powerpc/sotruss-lib.c b/REORG.TODO/sysdeps/powerpc/sotruss-lib.c new file mode 100644 index 0000000000..e5274241a5 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/sotruss-lib.c @@ -0,0 +1,69 @@ +/* PowerPC specific sotruss-lib functions. + Copyright (C) 2013-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define HAVE_ARCH_PLTENTER +#define HAVE_ARCH_PLTEXIT + +#include <elf/sotruss-lib.c> + +#ifdef __powerpc64__ +# if _CALL_ELF != 2 +# define LA_PPC_REGS La_ppc64_regs +# define LA_PPC_RETVAL La_ppc64_retval +# define LA_PPC_GNU_PLTENTER la_ppc64_gnu_pltenter +# define LA_PPC_GNU_PLTEXIT la_ppc64_gnu_pltexit +# else +# define LA_PPC_REGS La_ppc64v2_regs +# define LA_PPC_RETVAL La_ppc64v2_retval +# define LA_PPC_GNU_PLTENTER la_ppc64v2_gnu_pltenter +# define LA_PPC_GNU_PLTEXIT la_ppc64v2_gnu_pltexit +# endif +# else +# define LA_PPC_REGS La_ppc32_regs +# define LA_PPC_RETVAL La_ppc32_retval +# define LA_PPC_GNU_PLTENTER la_ppc32_gnu_pltenter +# define LA_PPC_GNU_PLTEXIT la_ppc32_gnu_pltexit +#endif + +ElfW(Addr) +LA_PPC_GNU_PLTENTER (ElfW(Sym) *sym __attribute__ ((unused)), + unsigned int ndx __attribute__ ((unused)), + uintptr_t *refcook, uintptr_t *defcook, + LA_PPC_REGS *regs, unsigned int *flags, + const char *symname, long int *framesizep) +{ + print_enter (refcook, defcook, symname, + regs->lr_reg[0], regs->lr_reg[1], regs->lr_reg[2], *flags); + + /* No need to copy anything, we will not need the parameters in any case. */ + *framesizep = 0; + + return sym->st_value; +} + +unsigned int +LA_PPC_GNU_PLTEXIT (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, + const struct LA_PPC_REGS *inregs, + struct LA_PPC_RETVAL *outregs, const char *symname) +{ + print_exit (refcook, defcook, symname, outregs->lrv_r3); + + return 0; +} diff --git a/REORG.TODO/sysdeps/powerpc/stackinfo.h b/REORG.TODO/sysdeps/powerpc/stackinfo.h new file mode 100644 index 0000000000..964ab77d56 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/stackinfo.h @@ -0,0 +1,38 @@ +/* Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* This file contains a bit of information about the stack allocation + of the processor. */ + +#ifndef _STACKINFO_H +#define _STACKINFO_H 1 + +#include <elf.h> + +/* On PPC the stack grows down. */ +#define _STACK_GROWS_DOWN 1 + +#if __WORDSIZE == 64 +/* PPC64 doesn't need an executable stack and doesn't need PT_GNU_STACK + * to make the stack nonexecutable. */ +# define DEFAULT_STACK_PERMS (PF_R|PF_W) +#else +/* PF_X can be overridden if PT_GNU_STACK is present but is presumed absent. */ +# define DEFAULT_STACK_PERMS (PF_R|PF_W|PF_X) +#endif + +#endif /* stackinfo.h */ diff --git a/REORG.TODO/sysdeps/powerpc/sys/platform/ppc.h b/REORG.TODO/sysdeps/powerpc/sys/platform/ppc.h new file mode 100644 index 0000000000..4a0c015766 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/sys/platform/ppc.h @@ -0,0 +1,146 @@ +/* Facilities specific to the PowerPC architecture + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _SYS_PLATFORM_PPC_H +#define _SYS_PLATFORM_PPC_H 1 + +#include <features.h> +#include <stdint.h> +#include <bits/ppc.h> + +/* Read the Time Base Register. */ +static __inline__ uint64_t +__ppc_get_timebase (void) +{ +#if __GNUC_PREREQ (4, 8) + return __builtin_ppc_get_timebase (); +#else +# ifdef __powerpc64__ + uint64_t __tb; + /* "volatile" is necessary here, because the user expects this assembly + isn't moved after an optimization. */ + __asm__ volatile ("mfspr %0, 268" : "=r" (__tb)); + return __tb; +# else /* not __powerpc64__ */ + uint32_t __tbu, __tbl, __tmp; \ + __asm__ volatile ("0:\n\t" + "mftbu %0\n\t" + "mftbl %1\n\t" + "mftbu %2\n\t" + "cmpw %0, %2\n\t" + "bne- 0b" + : "=r" (__tbu), "=r" (__tbl), "=r" (__tmp)); + return (((uint64_t) __tbu << 32) | __tbl); +# endif /* not __powerpc64__ */ +#endif +} + +/* The following functions provide hints about the usage of shared processor + resources, as defined in ISA 2.06 and newer. */ + +/* Provides a hint that performance will probably be improved if shared + resources dedicated to the executing processor are released for use by other + processors. */ +static __inline__ void +__ppc_yield (void) +{ + __asm__ volatile ("or 27,27,27"); +} + +/* Provides a hint that performance will probably be improved if shared + resources dedicated to the executing processor are released until + all outstanding storage accesses to caching-inhibited storage have been + completed. */ +static __inline__ void +__ppc_mdoio (void) +{ + __asm__ volatile ("or 29,29,29"); +} + +/* Provides a hint that performance will probably be improved if shared + resources dedicated to the executing processor are released until all + outstanding storage accesses to cacheable storage for which the data is not + in the cache have been completed. */ +static __inline__ void +__ppc_mdoom (void) +{ + __asm__ volatile ("or 30,30,30"); +} + + +/* ISA 2.05 and beyond support the Program Priority Register (PPR) to adjust + thread priorities based on lock acquisition, wait and release. The ISA + defines the use of form 'or Rx,Rx,Rx' as the way to modify the PRI field. + The unprivileged priorities are: + Rx = 1 (low) + Rx = 2 (medium) + Rx = 6 (medium-low/normal) + The 'or' instruction form is a nop in previous hardware, so it is safe to + use unguarded. The default value is 'medium'. + */ + +static __inline__ void +__ppc_set_ppr_med (void) +{ + __asm__ volatile ("or 2,2,2"); +} + +static __inline__ void +__ppc_set_ppr_med_low (void) +{ + __asm__ volatile ("or 6,6,6"); +} + +static __inline__ void +__ppc_set_ppr_low (void) +{ + __asm__ volatile ("or 1,1,1"); +} + +/* Power ISA 2.07 (Book II, Chapter 3) extends the priorities that can be set + to the Program Priority Register (PPR). The form 'or Rx,Rx,Rx' is used to + modify the PRI field of the PPR, the same way as described above. + The new priority levels are: + Rx = 31 (very low) + Rx = 5 (medium high) + Any program can set the priority to very low, low, medium low, and medium, + as these are unprivileged. + The medium high priority, on the other hand, is privileged, and may only be + set during certain time intervals by problem-state programs. If the program + priority is medium high when the time interval expires or if an attempt is + made to set the priority to medium high when it is not allowed, the PRI + field is set to medium. + */ + +#ifdef _ARCH_PWR8 + +static __inline__ void +__ppc_set_ppr_very_low (void) +{ + __asm__ volatile ("or 31,31,31"); +} + +static __inline__ void +__ppc_set_ppr_med_high (void) +{ + __asm__ volatile ("or 5,5,5"); +} + +#endif + +#endif /* sys/platform/ppc.h */ diff --git a/REORG.TODO/sysdeps/powerpc/sysdep.h b/REORG.TODO/sysdeps/powerpc/sysdep.h new file mode 100644 index 0000000000..f07b959eee --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/sysdep.h @@ -0,0 +1,189 @@ +/* Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + * Powerpc Feature masks for the Aux Vector Hardware Capabilities (AT_HWCAP). + * This entry is copied to _dl_hwcap or rtld_global._dl_hwcap during startup. + */ +#define _SYSDEPS_SYSDEP_H 1 +#include <bits/hwcap.h> +#ifdef ENABLE_LOCK_ELISION +#include <tls.h> +#include <htm.h> +#endif + +#define PPC_FEATURE_970 (PPC_FEATURE_POWER4 + PPC_FEATURE_HAS_ALTIVEC) + +#ifdef __ASSEMBLER__ + +/* Symbolic names for the registers. The only portable way to write asm + code is to use number but this produces really unreadable code. + Therefore these symbolic names. */ + +/* Integer registers. */ +#define r0 0 +#define r1 1 +#define r2 2 +#define r3 3 +#define r4 4 +#define r5 5 +#define r6 6 +#define r7 7 +#define r8 8 +#define r9 9 +#define r10 10 +#define r11 11 +#define r12 12 +#define r13 13 +#define r14 14 +#define r15 15 +#define r16 16 +#define r17 17 +#define r18 18 +#define r19 19 +#define r20 20 +#define r21 21 +#define r22 22 +#define r23 23 +#define r24 24 +#define r25 25 +#define r26 26 +#define r27 27 +#define r28 28 +#define r29 29 +#define r30 30 +#define r31 31 + +/* Floating-point registers. */ +#define fp0 0 +#define fp1 1 +#define fp2 2 +#define fp3 3 +#define fp4 4 +#define fp5 5 +#define fp6 6 +#define fp7 7 +#define fp8 8 +#define fp9 9 +#define fp10 10 +#define fp11 11 +#define fp12 12 +#define fp13 13 +#define fp14 14 +#define fp15 15 +#define fp16 16 +#define fp17 17 +#define fp18 18 +#define fp19 19 +#define fp20 20 +#define fp21 21 +#define fp22 22 +#define fp23 23 +#define fp24 24 +#define fp25 25 +#define fp26 26 +#define fp27 27 +#define fp28 28 +#define fp29 29 +#define fp30 30 +#define fp31 31 + +/* Condition code registers. */ +#define cr0 0 +#define cr1 1 +#define cr2 2 +#define cr3 3 +#define cr4 4 +#define cr5 5 +#define cr6 6 +#define cr7 7 + +/* Vector registers. */ +#define v0 0 +#define v1 1 +#define v2 2 +#define v3 3 +#define v4 4 +#define v5 5 +#define v6 6 +#define v7 7 +#define v8 8 +#define v9 9 +#define v10 10 +#define v11 11 +#define v12 12 +#define v13 13 +#define v14 14 +#define v15 15 +#define v16 16 +#define v17 17 +#define v18 18 +#define v19 19 +#define v20 20 +#define v21 21 +#define v22 22 +#define v23 23 +#define v24 24 +#define v25 25 +#define v26 26 +#define v27 27 +#define v28 28 +#define v29 29 +#define v30 30 +#define v31 31 + +#define VRSAVE 256 + +/* The 32-bit words of a 64-bit dword are at these offsets in memory. */ +#if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN +# define LOWORD 0 +# define HIWORD 4 +#else +# define LOWORD 4 +# define HIWORD 0 +#endif + +/* The high 16-bit word of a 64-bit dword is at this offset in memory. */ +#if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN +# define HISHORT 6 +#else +# define HISHORT 0 +#endif + +/* This seems to always be the case on PPC. */ +#define ALIGNARG(log2) log2 +#define ASM_SIZE_DIRECTIVE(name) .size name,.-name + +#else + +/* Linux kernel powerpc documentation [1] states issuing a syscall inside a + transaction is not recommended and may lead to undefined behavior. It + also states syscalls do not abort transactions. To avoid such traps, + we abort transaction just before syscalls. + + [1] Documentation/powerpc/transactional_memory.txt [Syscalls] */ +#if !IS_IN(rtld) && defined (ENABLE_LOCK_ELISION) +# define ABORT_TRANSACTION \ + ({ \ + if (THREAD_GET_TM_CAPABLE ()) \ + __libc_tabort (_ABORT_SYSCALL); \ + }) +#else +# define ABORT_TRANSACTION +#endif + +#endif /* __ASSEMBLER__ */ diff --git a/REORG.TODO/sysdeps/powerpc/test-arith.c b/REORG.TODO/sysdeps/powerpc/test-arith.c new file mode 100644 index 0000000000..aa1568d4d7 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/test-arith.c @@ -0,0 +1,604 @@ +/* Test floating-point arithmetic operations. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fenv.h> +#include <assert.h> + +#ifndef ESIZE +typedef double tocheck_t; +#define ESIZE 11 +#define MSIZE 52 +#define FUNC(x) x +#endif + +#define R_NEAREST 1 +#define R_ZERO 2 +#define R_UP 4 +#define R_DOWN 8 +#define R_ALL (R_NEAREST|R_ZERO|R_UP|R_DOWN) +static fenv_t rmodes[4]; +static const char * const rmnames[4] = +{ "nearest","zero","+Inf","-Inf" }; + +typedef union { + tocheck_t tc; + unsigned char c[sizeof(tocheck_t)]; +} union_t; + +/* Don't try reading these in a font that doesn't distinguish + O and zero. */ +typedef enum { + P_Z = 0x0, /* 00000...0 */ + P_000O = 0x1, /* 00011...1 */ + P_001Z = 0x2, /* 00100...0 */ + P_00O = 0x3, /* 00111...1 */ + P_01Z = 0x4, /* 01000...0 */ + P_010O = 0x5, /* 01011...1 */ + P_011Z = 0x6, /* 01100...0 */ + P_0O = 0x7, /* 01111...1 */ + P_1Z = 0x8, /* 10000...0 */ + P_100O = 0x9, /* 10011...1 */ + P_101Z = 0xa, /* 10100...0 */ + P_10O = 0xb, /* 10111...1 */ + P_11Z = 0xc, /* 11000...0 */ + P_110O = 0xd, /* 11011...1 */ + P_111Z = 0xe, /* 11100...0 */ + P_O = 0xf, /* 11111...1 */ + P_Z1 = 0x11, /* 000...001 */ + P_Z10 = 0x12, /* 000...010 */ + P_Z11 = 0x13, /* 000...011 */ + P_0O00 = 0x14, /* 011...100 */ + P_0O01 = 0x15, /* 011...101 */ + P_0O0 = 0x16, /* 011...110 */ + P_1Z1 = 0x19, /* 100...001 */ + P_1Z10 = 0x1a, /* 100...010 */ + P_1Z11 = 0x1b, /* 100...011 */ + P_O00 = 0x1c, /* 111...100 */ + P_O01 = 0x1d, /* 111...101 */ + P_O0 = 0x1e, /* 111...110 */ + P_R = 0x20, /* rrr...rrr */ /* ('r' means random. ) */ + P_Ro = 0x21, /* rrr...rrr, with odd parity. */ + P_0R = 0x22, /* 0rr...rrr */ + P_1R = 0x23, /* 1rr...rrr */ + P_Rno = 0x24, /* rrr...rrr, but not all ones. */ +} pattern_t; + +static void +pattern_fill(pattern_t ptn, unsigned char *start, int bitoffset, int count) +{ +#define bitset(count, value) \ + start[(count)/8] = (start[(count)/8] & ~(1 << 7-(count)%8) \ + | (value) << 7-(count)%8) + int i; + + if (ptn >= 0 && ptn <= 0xf) + { + /* Patterns between 0 and 0xF have the following format: + The LSBit is used to fill the last n-3 bits of the pattern; + The next 3 bits are the first 3 bits of the pattern. */ + for (i = 0; i < count; i++) + if (i < 3) + bitset((bitoffset+i), ptn >> (3-i) & 1); + else + bitset((bitoffset+i), ptn >> 0 & 1); + } + else if (ptn <= 0x1f) + { + /* Patterns between 0x10 and 0x1F have the following format: + The two LSBits are the last two bits of the pattern; + The 0x8 bit is the first bit of the pattern; + The 0x4 bit is used to fill the remainder. */ + for (i = 0; i < count; i++) + if (i == 0) + bitset((bitoffset+i), ptn >> 3 & 1); + else if (i >= count-2) + bitset((bitoffset+i), ptn >> (count-1-i) & 1); + else + bitset((bitoffset+i), ptn >> 2 & 1); + } + else switch (ptn) + { + case P_0R: case P_1R: + assert(count > 0); + bitset(bitoffset, ptn & 1); + count--; + bitoffset++; + case P_R: + for (; count > 0; count--, bitoffset++) + bitset(bitoffset, rand() & 1); + break; + case P_Ro: + { + int op = 1; + assert(count > 0); + for (; count > 1; count--, bitoffset++) + bitset(bitoffset, op ^= (rand() & 1)); + bitset(bitoffset, op); + break; + } + case P_Rno: + { + int op = 1; + assert(count > 0); + for (; count > 1; count--, bitoffset++) + { + int r = rand() & 1; + op &= r; + bitset(bitoffset, r); + } + bitset(bitoffset, rand() & (op ^ 1)); + break; + } + + default: + assert(0); + } +#undef bitset +} + +static tocheck_t +pattern(int negative, pattern_t exp, pattern_t mant) +{ + union_t result; +#if 0 + int i; +#endif + + pattern_fill(negative ? P_O : P_Z, result.c, 0, 1); + pattern_fill(exp, result.c, 1, ESIZE); + pattern_fill(mant, result.c, ESIZE+1, MSIZE); +#if 0 + printf("neg=%d exp=%02x mant=%02x: ", negative, exp, mant); + for (i = 0; i < sizeof(tocheck_t); i++) + printf("%02x", result.c[i]); + printf("\n"); +#endif + return result.tc; +} + +/* Return the closest different tocheck_t to 'x' in the direction of + 'direction', or 'x' if there is no such value. Assumes 'x' is not + a NaN. */ +static tocheck_t +delta(tocheck_t x, int direction) +{ + union_t xx; + int i; + + xx.tc = x; + if (xx.c[0] & 0x80) + direction = -direction; + if (direction == +1) + { + union_t tx; + tx.tc = pattern(xx.c[0] >> 7, P_O, P_Z); + if (memcmp(tx.c, xx.c, sizeof(tocheck_t)) == 0) + return x; + } + for (i = sizeof(tocheck_t)-1; i > 0; i--) + { + xx.c[i] += direction; + if (xx.c[i] != (direction > 0 ? 0 : 0xff)) + return xx.tc; + } + if (direction < 0 && (xx.c[0] & 0x7f) == 0) + return pattern(~(xx.c[0] >> 7) & 1, P_Z, P_Z1); + else + { + xx.c[0] += direction; + return xx.tc; + } +} + +static int nerrors = 0; + +#ifdef FE_ALL_INVALID +static const int all_exceptions = FE_ALL_INVALID | FE_ALL_EXCEPT; +#else +static const int all_exceptions = FE_ALL_EXCEPT; +#endif + +static void +check_result(int line, const char *rm, tocheck_t expected, tocheck_t actual) +{ + if (memcmp(&expected, &actual, sizeof(tocheck_t)) != 0) + { + unsigned char *ex, *ac; + size_t i; + + printf("%s:%d:round %s:result failed\n" + " expected result 0x", __FILE__, line, rm); + ex = (unsigned char *)&expected; + ac = (unsigned char *)&actual; + for (i = 0; i < sizeof(tocheck_t); i++) + printf("%02x", ex[i]); + printf(" got 0x"); + for (i = 0; i < sizeof(tocheck_t); i++) + printf("%02x", ac[i]); + printf("\n"); + nerrors++; + } +} + +static const struct { + int except; + const char *name; +} excepts[] = { +#define except_entry(ex) { ex, #ex } , +#ifdef FE_INEXACT + except_entry(FE_INEXACT) +#else +# define FE_INEXACT 0 +#endif +#ifdef FE_DIVBYZERO + except_entry(FE_DIVBYZERO) +#else +# define FE_DIVBYZERO 0 +#endif +#ifdef FE_UNDERFLOW + except_entry(FE_UNDERFLOW) +#else +# define FE_UNDERFLOW 0 +#endif +#ifdef FE_OVERFLOW + except_entry(FE_OVERFLOW) +#else +# define FE_OVERFLOW 0 +#endif +#ifdef FE_INVALID + except_entry(FE_INVALID) +#else +# define FE_INVALID 0 +#endif +#ifdef FE_INVALID_SNAN + except_entry(FE_INVALID_SNAN) +#else +# define FE_INVALID_SNAN FE_INVALID +#endif +#ifdef FE_INVALID_ISI + except_entry(FE_INVALID_ISI) +#else +# define FE_INVALID_ISI FE_INVALID +#endif +#ifdef FE_INVALID_IDI + except_entry(FE_INVALID_IDI) +#else +# define FE_INVALID_IDI FE_INVALID +#endif +#ifdef FE_INVALID_ZDZ + except_entry(FE_INVALID_ZDZ) +#else +# define FE_INVALID_ZDZ FE_INVALID +#endif +#ifdef FE_INVALID_COMPARE + except_entry(FE_INVALID_COMPARE) +#else +# define FE_INVALID_COMPARE FE_INVALID +#endif +#ifdef FE_INVALID_SOFTWARE + except_entry(FE_INVALID_SOFTWARE) +#else +# define FE_INVALID_SOFTWARE FE_INVALID +#endif +#ifdef FE_INVALID_SQRT + except_entry(FE_INVALID_SQRT) +#else +# define FE_INVALID_SQRT FE_INVALID +#endif +#ifdef FE_INVALID_INTEGER_CONVERSION + except_entry(FE_INVALID_INTEGER_CONVERSION) +#else +# define FE_INVALID_INTEGER_CONVERSION FE_INVALID +#endif +}; + +static int excepts_missing = 0; + +static void +check_excepts(int line, const char *rm, int expected, int actual) +{ + if (expected & excepts_missing) + expected = expected & ~excepts_missing | FE_INVALID_SNAN; + if ((expected & all_exceptions) != actual) + { + size_t i; + printf("%s:%d:round %s:exceptions failed\n" + " expected exceptions ", __FILE__, line,rm); + for (i = 0; i < sizeof(excepts)/sizeof(excepts[0]); i++) + if (expected & excepts[i].except) + printf("%s ",excepts[i].name); + if ((expected & all_exceptions) == 0) + printf("- "); + printf("got"); + for (i = 0; i < sizeof(excepts)/sizeof(excepts[0]); i++) + if (actual & excepts[i].except) + printf(" %s",excepts[i].name); + if ((actual & all_exceptions) == 0) + printf("- "); + printf(".\n"); + nerrors++; + } +} + +typedef enum { + B_ADD, B_SUB, B_MUL, B_DIV, B_NEG, B_ABS, B_SQRT +} op_t; +typedef struct { + int line; + op_t op; + int a_sgn; + pattern_t a_exp, a_mant; + int b_sgn; + pattern_t b_exp, b_mant; + int rmode; + int excepts; + int x_sgn; + pattern_t x_exp, x_mant; +} optest_t; +static const optest_t optests[] = { + /* Additions of zero. */ + {__LINE__,B_ADD, 0,P_Z,P_Z, 0,P_Z,P_Z, R_ALL,0, 0,P_Z,P_Z }, + {__LINE__,B_ADD, 1,P_Z,P_Z, 0,P_Z,P_Z, R_ALL & ~R_DOWN,0, 0,P_Z,P_Z }, + {__LINE__,B_ADD, 1,P_Z,P_Z, 0,P_Z,P_Z, R_DOWN,0, 1,P_Z,P_Z }, + {__LINE__,B_ADD, 1,P_Z,P_Z, 1,P_Z,P_Z, R_ALL,0, 1,P_Z,P_Z }, + + /* Additions with NaN. */ + {__LINE__,B_ADD, 0,P_O,P_101Z, 0,P_Z,P_Z, R_ALL,0, 0,P_O,P_101Z }, + {__LINE__,B_ADD, 0,P_O,P_01Z, 0,P_Z,P_Z, R_ALL, + FE_INVALID | FE_INVALID_SNAN, 0,P_O,P_11Z }, + {__LINE__,B_ADD, 0,P_O,P_Z, 0,P_O,P_0O, R_ALL, + FE_INVALID | FE_INVALID_SNAN, 0,P_O,P_O }, + {__LINE__,B_ADD, 0,P_Z,P_Z, 0,P_O,P_11Z, R_ALL,0, 0,P_O,P_11Z }, + {__LINE__,B_ADD, 0,P_O,P_001Z, 0,P_O,P_001Z, R_ALL, + FE_INVALID | FE_INVALID_SNAN, 0,P_O,P_101Z }, + {__LINE__,B_ADD, 0,P_O,P_1Z, 0,P_Z,P_Z, R_ALL,0, 0,P_O,P_1Z }, + {__LINE__,B_ADD, 0,P_0O,P_Z, 0,P_O,P_10O, R_ALL,0, 0,P_O,P_10O }, + + /* Additions with infinity. */ + {__LINE__,B_ADD, 0,P_O,P_Z, 0,P_Z,P_Z, R_ALL,0, 0,P_O,P_Z }, + {__LINE__,B_ADD, 0,P_O,P_Z, 1,P_Z,P_Z, R_ALL,0, 0,P_O,P_Z }, + {__LINE__,B_ADD, 1,P_O,P_Z, 0,P_Z,P_Z, R_ALL,0, 1,P_O,P_Z }, + {__LINE__,B_ADD, 1,P_O,P_Z, 1,P_Z,P_Z, R_ALL,0, 1,P_O,P_Z }, + {__LINE__,B_ADD, 0,P_O,P_Z, 0,P_O,P_Z, R_ALL,0, 0,P_O,P_Z }, + {__LINE__,B_ADD, 1,P_O,P_Z, 1,P_O,P_Z, R_ALL,0, 1,P_O,P_Z }, + {__LINE__,B_ADD, 0,P_O,P_Z, 1,P_O,P_Z, R_ALL, + FE_INVALID | FE_INVALID_ISI, 0,P_O,P_1Z }, + {__LINE__,B_ADD, 1,P_O,P_Z, 0,P_O,P_Z, R_ALL, + FE_INVALID | FE_INVALID_ISI, 0,P_O,P_1Z }, + {__LINE__,B_ADD, 0,P_O,P_Z, 0,P_0O,P_Z, R_ALL,0, 0,P_O,P_Z }, + {__LINE__,B_ADD, 1,P_O,P_Z, 0,P_0O,P_Z, R_ALL,0, 1,P_O,P_Z }, + {__LINE__,B_ADD, 0,P_O,P_Z, 1,P_0O,P_Z, R_ALL,0, 0,P_O,P_Z }, + {__LINE__,B_ADD, 1,P_O,P_Z, 1,P_0O,P_Z, R_ALL,0, 1,P_O,P_Z }, + + /* Overflow (and zero). */ + {__LINE__,B_ADD, 0,P_O0,P_Z, 0,P_O0,P_Z, R_NEAREST | R_UP, + FE_INEXACT | FE_OVERFLOW, 0,P_O,P_Z }, + {__LINE__,B_ADD, 0,P_O0,P_Z, 0,P_O0,P_Z, R_ZERO | R_DOWN, + FE_INEXACT | FE_OVERFLOW, 0,P_O0,P_O }, + {__LINE__,B_ADD, 1,P_O0,P_Z, 1,P_O0,P_Z, R_NEAREST | R_DOWN, + FE_INEXACT | FE_OVERFLOW, 1,P_O,P_Z }, + {__LINE__,B_ADD, 1,P_O0,P_Z, 1,P_O0,P_Z, R_ZERO | R_UP, + FE_INEXACT | FE_OVERFLOW, 1,P_O0,P_O }, + {__LINE__,B_ADD, 0,P_O0,P_Z, 1,P_O0,P_Z, R_ALL & ~R_DOWN, + 0, 0,P_Z,P_Z }, + {__LINE__,B_ADD, 0,P_O0,P_Z, 1,P_O0,P_Z, R_DOWN, + 0, 1,P_Z,P_Z }, + + /* Negation. */ + {__LINE__,B_NEG, 0,P_Z,P_Z, 0,0,0, R_ALL, 0, 1,P_Z,P_Z }, + {__LINE__,B_NEG, 1,P_Z,P_Z, 0,0,0, R_ALL, 0, 0,P_Z,P_Z }, + {__LINE__,B_NEG, 0,P_O,P_Z, 0,0,0, R_ALL, 0, 1,P_O,P_Z }, + {__LINE__,B_NEG, 1,P_O,P_Z, 0,0,0, R_ALL, 0, 0,P_O,P_Z }, + {__LINE__,B_NEG, 0,P_O,P_1Z, 0,0,0, R_ALL, 0, 1,P_O,P_1Z }, + {__LINE__,B_NEG, 1,P_O,P_1Z, 0,0,0, R_ALL, 0, 0,P_O,P_1Z }, + {__LINE__,B_NEG, 0,P_O,P_01Z, 0,0,0, R_ALL, 0, 1,P_O,P_01Z }, + {__LINE__,B_NEG, 1,P_O,P_01Z, 0,0,0, R_ALL, 0, 0,P_O,P_01Z }, + {__LINE__,B_NEG, 0,P_1Z,P_1Z1, 0,0,0, R_ALL, 0, 1,P_1Z,P_1Z1 }, + {__LINE__,B_NEG, 1,P_1Z,P_1Z1, 0,0,0, R_ALL, 0, 0,P_1Z,P_1Z1 }, + {__LINE__,B_NEG, 0,P_Z,P_Z1, 0,0,0, R_ALL, 0, 1,P_Z,P_Z1 }, + {__LINE__,B_NEG, 1,P_Z,P_Z1, 0,0,0, R_ALL, 0, 0,P_Z,P_Z1 }, + + /* Absolute value. */ + {__LINE__,B_ABS, 0,P_Z,P_Z, 0,0,0, R_ALL, 0, 0,P_Z,P_Z }, + {__LINE__,B_ABS, 1,P_Z,P_Z, 0,0,0, R_ALL, 0, 0,P_Z,P_Z }, + {__LINE__,B_ABS, 0,P_O,P_Z, 0,0,0, R_ALL, 0, 0,P_O,P_Z }, + {__LINE__,B_ABS, 1,P_O,P_Z, 0,0,0, R_ALL, 0, 0,P_O,P_Z }, + {__LINE__,B_ABS, 0,P_O,P_1Z, 0,0,0, R_ALL, 0, 0,P_O,P_1Z }, + {__LINE__,B_ABS, 1,P_O,P_1Z, 0,0,0, R_ALL, 0, 0,P_O,P_1Z }, + {__LINE__,B_ABS, 0,P_O,P_01Z, 0,0,0, R_ALL, 0, 0,P_O,P_01Z }, + {__LINE__,B_ABS, 1,P_O,P_01Z, 0,0,0, R_ALL, 0, 0,P_O,P_01Z }, + {__LINE__,B_ABS, 0,P_1Z,P_1Z1, 0,0,0, R_ALL, 0, 0,P_1Z,P_1Z1 }, + {__LINE__,B_ABS, 1,P_1Z,P_1Z1, 0,0,0, R_ALL, 0, 0,P_1Z,P_1Z1 }, + {__LINE__,B_ABS, 0,P_Z,P_Z1, 0,0,0, R_ALL, 0, 0,P_Z,P_Z1 }, + {__LINE__,B_ABS, 1,P_Z,P_Z1, 0,0,0, R_ALL, 0, 0,P_Z,P_Z1 }, + + /* Square root. */ + {__LINE__,B_SQRT, 0,P_Z,P_Z, 0,0,0, R_ALL, 0, 0,P_Z,P_Z }, + {__LINE__,B_SQRT, 1,P_Z,P_Z, 0,0,0, R_ALL, 0, 1,P_Z,P_Z }, + {__LINE__,B_SQRT, 0,P_O,P_1Z, 0,0,0, R_ALL, 0, 0,P_O,P_1Z }, + {__LINE__,B_SQRT, 1,P_O,P_1Z, 0,0,0, R_ALL, 0, 1,P_O,P_1Z }, + {__LINE__,B_SQRT, 0,P_O,P_01Z, 0,0,0, R_ALL, + FE_INVALID | FE_INVALID_SNAN, 0,P_O,P_11Z }, + {__LINE__,B_SQRT, 1,P_O,P_01Z, 0,0,0, R_ALL, + FE_INVALID | FE_INVALID_SNAN, 1,P_O,P_11Z }, + + {__LINE__,B_SQRT, 0,P_O,P_Z, 0,0,0, R_ALL, 0, 0,P_O,P_Z }, + {__LINE__,B_SQRT, 0,P_0O,P_Z, 0,0,0, R_ALL, 0, 0,P_0O,P_Z }, + + {__LINE__,B_SQRT, 1,P_O,P_Z, 0,0,0, R_ALL, + FE_INVALID | FE_INVALID_SQRT, 0,P_O,P_1Z }, + {__LINE__,B_SQRT, 1,P_1Z,P_1Z1, 0,0,0, R_ALL, + FE_INVALID | FE_INVALID_SQRT, 0,P_O,P_1Z }, + {__LINE__,B_SQRT, 1,P_Z,P_Z1, 0,0,0, R_ALL, + FE_INVALID | FE_INVALID_SQRT, 0,P_O,P_1Z }, + +}; + +static void +check_op(void) +{ + size_t i; + int j; + tocheck_t r, a, b, x; + int raised; + + for (i = 0; i < sizeof(optests)/sizeof(optests[0]); i++) + { + a = pattern(optests[i].a_sgn, optests[i].a_exp, + optests[i].a_mant); + b = pattern(optests[i].b_sgn, optests[i].b_exp, + optests[i].b_mant); + x = pattern(optests[i].x_sgn, optests[i].x_exp, + optests[i].x_mant); + for (j = 0; j < 4; j++) + if (optests[i].rmode & 1<<j) + { + fesetenv(rmodes+j); + switch (optests[i].op) + { + case B_ADD: r = a + b; break; + case B_SUB: r = a - b; break; + case B_MUL: r = a * b; break; + case B_DIV: r = a / b; break; + case B_NEG: r = -a; break; + case B_ABS: r = FUNC(fabs)(a); break; + case B_SQRT: r = FUNC(sqrt)(a); break; + } + raised = fetestexcept(all_exceptions); + check_result(optests[i].line,rmnames[j],x,r); + check_excepts(optests[i].line,rmnames[j], + optests[i].excepts,raised); + } + } +} + +static void +fail_xr(int line, const char *rm, tocheck_t x, tocheck_t r, tocheck_t xx, + int xflag) +{ + size_t i; + unsigned char *cx, *cr, *cxx; + + printf("%s:%d:round %s:fail\n with x=0x", __FILE__, line,rm); + cx = (unsigned char *)&x; + cr = (unsigned char *)&r; + cxx = (unsigned char *)&xx; + for (i = 0; i < sizeof(tocheck_t); i++) + printf("%02x", cx[i]); + printf(" r=0x"); + for (i = 0; i < sizeof(tocheck_t); i++) + printf("%02x", cr[i]); + printf(" xx=0x"); + for (i = 0; i < sizeof(tocheck_t); i++) + printf("%02x", cxx[i]); + printf(" inexact=%d\n", xflag != 0); + nerrors++; +} + +static void +check_sqrt(tocheck_t a) +{ + int j; + tocheck_t r0, r1, r2, x0, x1, x2; + int raised = 0; + int ok; + + for (j = 0; j < 4; j++) + { + int excepts; + + fesetenv(rmodes+j); + r1 = FUNC(sqrt)(a); + excepts = fetestexcept(all_exceptions); + fesetenv(FE_DFL_ENV); + raised |= excepts & ~FE_INEXACT; + x1 = r1 * r1 - a; + if (excepts & FE_INEXACT) + { + r0 = delta(r1,-1); r2 = delta(r1,1); + switch (1 << j) + { + case R_NEAREST: + x0 = r0 * r0 - a; x2 = r2 * r2 - a; + ok = fabs(x0) >= fabs(x1) && fabs(x1) <= fabs(x2); + break; + case R_ZERO: case R_DOWN: + x2 = r2 * r2 - a; + ok = x1 <= 0 && x2 >= 0; + break; + case R_UP: + x0 = r0 * r0 - a; + ok = x1 >= 0 && x0 <= 0; + break; + default: + assert(0); + } + } + else + ok = x1 == 0; + if (!ok) + fail_xr(__LINE__,rmnames[j],a,r1,x1,excepts&FE_INEXACT); + } + check_excepts(__LINE__,"all",0,raised); +} + +int main(int argc, char **argv) +{ + int i; + + _LIB_VERSION = _IEEE_; + + /* Set up environments for rounding modes. */ + fesetenv(FE_DFL_ENV); + fesetround(FE_TONEAREST); + fegetenv(rmodes+0); + fesetround(FE_TOWARDZERO); + fegetenv(rmodes+1); + fesetround(FE_UPWARD); + fegetenv(rmodes+2); + fesetround(FE_DOWNWARD); + fegetenv(rmodes+3); + +#if defined(FE_INVALID_SOFTWARE) || defined(FE_INVALID_SQRT) + /* There's this really stupid feature of the 601... */ + fesetenv(FE_DFL_ENV); + feraiseexcept(FE_INVALID_SOFTWARE); + if (!fetestexcept(FE_INVALID_SOFTWARE)) + excepts_missing |= FE_INVALID_SOFTWARE; + fesetenv(FE_DFL_ENV); + feraiseexcept(FE_INVALID_SQRT); + if (!fetestexcept(FE_INVALID_SQRT)) + excepts_missing |= FE_INVALID_SQRT; +#endif + + check_op(); + for (i = 0; i < 100000; i++) + check_sqrt(pattern(0, P_Rno, P_R)); + for (i = 0; i < 100; i++) + check_sqrt(pattern(0, P_Z, P_R)); + check_sqrt(pattern(0,P_Z,P_Z1)); + + printf("%d errors.\n", nerrors); + return nerrors == 0 ? 0 : 1; +} diff --git a/REORG.TODO/sysdeps/powerpc/test-arithf.c b/REORG.TODO/sysdeps/powerpc/test-arithf.c new file mode 100644 index 0000000000..d78ec49009 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/test-arithf.c @@ -0,0 +1,6 @@ +typedef float tocheck_t; +#define ESIZE 8 +#define MSIZE 23 +#define FUNC(x) x##f + +#include "test-arith.c" diff --git a/REORG.TODO/sysdeps/powerpc/test-get_hwcap-static.c b/REORG.TODO/sysdeps/powerpc/test-get_hwcap-static.c new file mode 100644 index 0000000000..86d1ca355b --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/test-get_hwcap-static.c @@ -0,0 +1,23 @@ +/* Check __ppc_get_hwcap() and __ppc_get_at_plaftorm() functionality. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Tests if the hwcap, hwcap2 and platform data are stored in the TCB. */ + +#define STATIC_TST_HWCAP 1 + +#include "test-get_hwcap.c" diff --git a/REORG.TODO/sysdeps/powerpc/test-get_hwcap.c b/REORG.TODO/sysdeps/powerpc/test-get_hwcap.c new file mode 100644 index 0000000000..d776310734 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/test-get_hwcap.c @@ -0,0 +1,177 @@ +/* Check __ppc_get_hwcap() and __ppc_get_at_plaftorm() functionality. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Tests if the hwcap, hwcap2 and platform data are stored in the TCB. */ + +#include <inttypes.h> +#include <stdio.h> +#include <stdint.h> +#include <pthread.h> + +#include <support/check.h> +#include <support/xthread.h> + +#include <sys/auxv.h> + +#include <dl-procinfo.h> + +#ifndef STATIC_TST_HWCAP +#undef PROCINFO_DECL +#include <dl-procinfo.c> +#endif + +/* Offsets copied from tcb-offsets.h. */ + +#ifdef __powerpc64__ +# define __TPREG "r13" +# define __HWCAPOFF -28776 +# define __ATPLATOFF -28764 +#else +# define __TPREG "r2" +# define __HWCAPOFF -28736 +# define __HWCAP2OFF -28732 +# define __ATPLATOFF -28724 +#endif + +uint64_t check_tcbhwcap (long tid) +{ + + uint32_t tcb_at_platform, at_platform; + uint64_t hwcap, hwcap2, tcb_hwcap; + const char *at_platform_string; + + /* Testing if the hwcap/hwcap2 data is correctly initialized by + TLS_TP_INIT. */ + + register unsigned long __tp __asm__ (__TPREG); + +#ifdef __powerpc64__ + __asm__ ("ld %0,%1(%2)\n" + : "=r" (tcb_hwcap) + : "i" (__HWCAPOFF), "b" (__tp)); +#else + uint64_t h1, h2; + + __asm__ ("lwz %0,%1(%2)\n" + : "=r" (h1) + : "i" (__HWCAPOFF), "b" (__tp)); + __asm__ ("lwz %0,%1(%2)\n" + : "=r" (h2) + : "i" (__HWCAP2OFF), "b" (__tp)); + tcb_hwcap = (h1 >> 32) << 32 | (h2 >> 32); +#endif + + hwcap = getauxval (AT_HWCAP); + hwcap2 = getauxval (AT_HWCAP2); + + /* hwcap contains only the latest supported ISA, the code checks which is + and fills the previous supported ones. This is necessary because the + same is done in hwcapinfo.c when setting the values that are copied to + the TCB. */ + + if (hwcap2 & PPC_FEATURE2_ARCH_2_07) + hwcap |= PPC_FEATURE_ARCH_2_06 + | PPC_FEATURE_ARCH_2_05 + | PPC_FEATURE_POWER5_PLUS + | PPC_FEATURE_POWER5 + | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_ARCH_2_06) + hwcap |= PPC_FEATURE_ARCH_2_05 + | PPC_FEATURE_POWER5_PLUS + | PPC_FEATURE_POWER5 + | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_ARCH_2_05) + hwcap |= PPC_FEATURE_POWER5_PLUS + | PPC_FEATURE_POWER5 + | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_POWER5_PLUS) + hwcap |= PPC_FEATURE_POWER5 + | PPC_FEATURE_POWER4; + else if (hwcap & PPC_FEATURE_POWER5) + hwcap |= PPC_FEATURE_POWER4; + + hwcap = (hwcap << 32) + hwcap2; + + if ( tcb_hwcap != hwcap ) + { + printf ("FAIL: __ppc_get_hwcap() - HWCAP is %" PRIx64 ". Should be %" + PRIx64 " for thread %ld.\n", tcb_hwcap, hwcap, tid); + return 1; + } + + /* Same test for the platform number. */ + __asm__ ("lwz %0,%1(%2)\n" + : "=r" (tcb_at_platform) + : "i" (__ATPLATOFF), "b" (__tp)); + + at_platform_string = (const char *) getauxval (AT_PLATFORM); + at_platform = _dl_string_platform (at_platform_string); + + if ( tcb_at_platform != at_platform ) + { + printf ("FAIL: __ppc_get_at_platform() - AT_PLATFORM is %x. Should be %x" + " for thread %ld\n", tcb_at_platform, at_platform, tid); + return 1; + } + + return 0; +} + +void *t1 (void *tid) +{ + if (check_tcbhwcap ((long) tid)) + { + pthread_exit (tid); + } + + pthread_exit (NULL); + +} + +static int +do_test (void) +{ + + pthread_t threads[2]; + pthread_attr_t attr; + pthread_attr_init (&attr); + pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_JOINABLE); + + long i = 0; + + /* Check for main. */ + if (check_tcbhwcap (i)) + { + return 1; + } + + /* Check for other thread. */ + i++; + threads[i] = xpthread_create (&attr, t1, (void *)i); + + pthread_attr_destroy (&attr); + TEST_VERIFY_EXIT (xpthread_join (threads[i]) == NULL); + + printf("PASS: HWCAP, HWCAP2 and AT_PLATFORM are correctly set in the TCB for" + " all threads.\n"); + + pthread_exit (NULL); + +} + +#include <support/test-driver.c> diff --git a/REORG.TODO/sysdeps/powerpc/test-gettimebase.c b/REORG.TODO/sysdeps/powerpc/test-gettimebase.c new file mode 100644 index 0000000000..0e8e2f00fc --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/test-gettimebase.c @@ -0,0 +1,46 @@ +/* Check __ppc_get_timebase() for architecture changes + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Test if __ppc_get_timebase() is compatible with the current processor and if + it's changing between reads. A read failure might indicate a Power ISA or + binutils change. */ + +#include <inttypes.h> +#include <stdio.h> +#include <stdint.h> + +#include <sys/platform/ppc.h> + +static int +do_test (void) +{ + uint64_t t1, t2, t3; + t1 = __ppc_get_timebase (); + printf ("Time Base = %"PRIu64"\n", t1); + t2 = __ppc_get_timebase (); + printf ("Time Base = %"PRIu64"\n", t2); + t3 = __ppc_get_timebase (); + printf ("Time Base = %"PRIu64"\n", t3); + if (t1 != t2 && t1 != t3 && t2 != t3) + return 0; + + printf ("Fail: timebase reads should always be different.\n"); + return 1; +} + +#include <support/test-driver.c> diff --git a/REORG.TODO/sysdeps/powerpc/tls-macros.h b/REORG.TODO/sysdeps/powerpc/tls-macros.h new file mode 100644 index 0000000000..809ef5cea1 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/tls-macros.h @@ -0,0 +1,3 @@ +#define __TLS_CALL_CLOBBERS \ + "0", "4", "5", "6", "7", "8", "9", "10", "11", "12", \ + "lr", "ctr", "cr0", "cr1", "cr5", "cr6", "cr7" diff --git a/REORG.TODO/sysdeps/powerpc/tst-set_ppr.c b/REORG.TODO/sysdeps/powerpc/tst-set_ppr.c new file mode 100644 index 0000000000..df8dda78e8 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/tst-set_ppr.c @@ -0,0 +1,102 @@ +/* Test the implementation of __ppc_set_ppr_* functions. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <inttypes.h> +#include <stdio.h> +#include <stdint.h> +#include <sys/auxv.h> + +#include <sys/platform/ppc.h> + +#include <support/test-driver.h> + +#ifdef __powerpc64__ + typedef uint64_t ppr_t; +# define MFPPR "mfppr" + /* The thread priority value is obtained from bits 11:13. */ +# define EXTRACT_THREAD_PRIORITY(x) ((x >> 50) & 7) +#else + typedef uint32_t ppr_t; +# define MFPPR "mfppr32" + /* For 32-bit, the upper 32 bits of the Program Priority Register (PPR) + are used, so the thread priority value is obtained from bits 43:46. */ +# define EXTRACT_THREAD_PRIORITY(x) ((x >> 18) & 7) +#endif /* !__powerpc64__ */ + +/* Read the thread priority value set in the PPR. */ +static __inline__ ppr_t +get_thread_priority (void) +{ + /* Read the PPR. */ + ppr_t ppr; + asm volatile (MFPPR" %0" : "=r"(ppr)); + /* Return the thread priority value. */ + return EXTRACT_THREAD_PRIORITY (ppr); +} + +/* Check the thread priority bits of PPR are set as expected. */ +uint8_t +check_thread_priority (uint8_t expected) +{ + ppr_t actual = get_thread_priority (); + + if (actual != expected) + { + printf ("FAIL: Expected %"PRIu8" got %"PRIuMAX".\n", expected, + (uintmax_t) actual); + return 1; + } + printf ("PASS: Thread priority set to %"PRIu8" correctly.\n", expected); + return 0; +} + +/* The Power ISA 2.06 allows the following thread priorities for any + problem state program: low (2), medium low (3), and medium (4). + Power ISA 2.07b added very low (1). + Check whether the values set by __ppc_set_ppr_* are correct. */ +static int +do_test (void) +{ + /* Check for the minimum required Power ISA to run these tests. */ + if ((getauxval (AT_HWCAP) & PPC_FEATURE_ARCH_2_06) == 0) + { + printf ("Requires an environment that implements the Power ISA version" + " 2.06 or greater.\n"); + return EXIT_UNSUPPORTED; + } + + uint8_t rc = 0; + +#ifdef _ARCH_PWR8 + __ppc_set_ppr_very_low (); + rc |= check_thread_priority (1); +#endif /* _ARCH_PWR8 */ + + __ppc_set_ppr_low (); + rc |= check_thread_priority (2); + + __ppc_set_ppr_med_low (); + rc |= check_thread_priority (3); + + __ppc_set_ppr_med (); + rc |= check_thread_priority (4); + + return rc; +} + +#include <support/test-driver.c> diff --git a/REORG.TODO/sysdeps/powerpc/tst-stack-align.h b/REORG.TODO/sysdeps/powerpc/tst-stack-align.h new file mode 100644 index 0000000000..c01d0793ff --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/tst-stack-align.h @@ -0,0 +1,46 @@ +/* Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdio.h> +#include <stdint.h> + +#define TEST_STACK_ALIGN() \ + ({ \ + /* Altivec __vector int etc. needs 16byte aligned stack. \ + Instead of using altivec.h here, use aligned attribute instead. */ \ + struct _S \ + { \ + int _i __attribute__((aligned (16))); \ + int _j[3]; \ + } _s = { ._i = 18, ._j[0] = 19, ._j[1] = 20, ._j[2] = 21 }; \ + double _d = 12.0; \ + long double _ld = 15.0; \ + int _ret = 0; \ + printf ("__vector int: { %d, %d, %d, %d } %p %zu\n", _s._i, _s._j[0], \ + _s._j[1], _s._j[2], &_s, __alignof (_s)); \ + if ((((uintptr_t) &_s) & (__alignof (_s) - 1)) != 0) \ + _ret = 1; \ + \ + printf ("double: %g %p %zu\n", _d, &_d, __alignof (double)); \ + if ((((uintptr_t) &_d) & (__alignof (double) - 1)) != 0) \ + _ret = 1; \ + \ + printf ("ldouble: %Lg %p %zu\n", _ld, &_ld, __alignof (long double)); \ + if ((((uintptr_t) &_ld) & (__alignof (long double) - 1)) != 0) \ + _ret = 1; \ + _ret; \ + }) diff --git a/REORG.TODO/sysdeps/powerpc/tst-tlsopt-powerpc.c b/REORG.TODO/sysdeps/powerpc/tst-tlsopt-powerpc.c new file mode 100644 index 0000000000..8ae928a3f4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/tst-tlsopt-powerpc.c @@ -0,0 +1,51 @@ +/* glibc test for __tls_get_addr optimization. */ +#include <stdio.h> + +#include "../../elf/tls-macros.h" +#include "dl-tls.h" + +/* common 'int' variable in TLS. */ +COMMON_INT_DEF(foo); + + +static int +do_test (void) +{ + int result = 0; + + /* Get variable using general dynamic model. */ + int *ap = TLS_GD (foo); + if (*ap != 0) + { + printf ("foo = %d\n", *ap); + result = 1; + } + + tls_index *tls_arg; +#ifdef __powerpc64__ + register unsigned long thread_pointer __asm__ ("r13"); + asm ("addi %0,2,foo@got@tlsgd" : "=r" (tls_arg)); +#else + register unsigned long thread_pointer __asm__ ("r2"); + asm ("bcl 20,31,1f\n1:\t" + "mflr %0\n\t" + "addis %0,%0,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" + "addi %0,%0,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" + "addi %0,%0,foo@got@tlsgd" : "=b" (tls_arg)); +#endif + + if (tls_arg->ti_module != 0) + { + printf ("tls_index not optimized, binutils too old?\n"); + result = 1; + } + else if (tls_arg->ti_offset + thread_pointer != (unsigned long) ap) + { + printf ("tls_index->ti_offset wrong value\n"); + result = 1; + } + + return result; +} + +#include <support/test-driver.c> |