diff options
author | Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> | 2021-04-30 18:12:08 -0300 |
---|---|---|
committer | Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> | 2021-04-30 18:12:08 -0300 |
commit | e941e0ae80626b7661c1db8953a673cafd3b8b19 (patch) | |
tree | 42b3dcccfce69af0f7ffb0fa4ed2ed75734b82a2 /sysdeps/powerpc/powerpc64/multiarch | |
parent | dd59655e9371af86043b97e38953f43bd9496699 (diff) | |
download | glibc-e941e0ae80626b7661c1db8953a673cafd3b8b19.tar.gz |
powerpc64le: Optimize memcpy for POWER10
This implementation is based on __memcpy_power8_cached and integrates
suggestions from Anton Blanchard.
It benefits from loads and stores with length for short lengths and for
tail code, simplifying the code.
All unaligned memory accesses use instructions that do not generate
alignment interrupts on POWER10, making it safe to use on
caching-inhibited memory.
The main loop has also been modified in order to increase instruction
throughput by reducing the dependency on updates from previous iterations.
On average, this implementation provides around 30% improvement when
compared to __memcpy_power7 and 10% improvement in comparison to
__memcpy_power8_cached.
Diffstat (limited to 'sysdeps/powerpc/powerpc64/multiarch')
4 files changed, 40 insertions, 1 deletions
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile index a82219c490..9ef12d3563 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -32,7 +32,7 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \ strncase-power8 ifneq (,$(filter %le,$(config-machine))) -sysdep_routines += memmove-power10 \ +sysdep_routines += memcpy-power10 memmove-power10 \ strcmp-power9 strncmp-power9 strcpy-power9 stpcpy-power9 \ rawmemchr-power9 strlen-power9 strncpy-power9 stpncpy-power9 \ strlen-power10 diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c index d00bcc8178..1ab56bb2c9 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -51,6 +51,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, #ifdef SHARED /* Support sysdeps/powerpc/powerpc64/multiarch/memcpy.c. */ IFUNC_IMPL (i, name, memcpy, +#ifdef __LITTLE_ENDIAN__ + IFUNC_IMPL_ADD (array, i, memcpy, + hwcap2 & PPC_FEATURE2_ARCH_3_1 + && hwcap & PPC_FEATURE_HAS_VSX, + __memcpy_power10) +#endif IFUNC_IMPL_ADD (array, i, memcpy, hwcap2 & PPC_FEATURE2_ARCH_2_07, __memcpy_power8_cached) IFUNC_IMPL_ADD (array, i, memcpy, hwcap & PPC_FEATURE_HAS_VSX, diff --git a/sysdeps/powerpc/powerpc64/multiarch/memcpy-power10.S b/sysdeps/powerpc/powerpc64/multiarch/memcpy-power10.S new file mode 100644 index 0000000000..70e0fc3ed6 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/memcpy-power10.S @@ -0,0 +1,26 @@ +/* Optimized memcpy implementation for POWER10. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#if defined __LITTLE_ENDIAN__ && IS_IN (libc) +#define MEMCPY __memcpy_power10 + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/le/power10/memcpy.S> +#endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/memcpy.c b/sysdeps/powerpc/powerpc64/multiarch/memcpy.c index 5733192932..53ab32ef26 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/memcpy.c +++ b/sysdeps/powerpc/powerpc64/multiarch/memcpy.c @@ -36,8 +36,15 @@ extern __typeof (__redirect_memcpy) __memcpy_power6 attribute_hidden; extern __typeof (__redirect_memcpy) __memcpy_a2 attribute_hidden; extern __typeof (__redirect_memcpy) __memcpy_power7 attribute_hidden; extern __typeof (__redirect_memcpy) __memcpy_power8_cached attribute_hidden; +# if defined __LITTLE_ENDIAN__ +extern __typeof (__redirect_memcpy) __memcpy_power10 attribute_hidden; +# endif libc_ifunc (__libc_memcpy, +# if defined __LITTLE_ENDIAN__ + (hwcap2 & PPC_FEATURE2_ARCH_3_1 && hwcap & PPC_FEATURE_HAS_VSX) + ? __memcpy_power10 : +# endif ((hwcap2 & PPC_FEATURE2_ARCH_2_07) && use_cached_memopt) ? __memcpy_power8_cached : (hwcap & PPC_FEATURE_HAS_VSX) |