diff options
Diffstat (limited to 'sysdeps')
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/Makefile | 4 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c | 6 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S | 18 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S | 28 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c | 21 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c | 30 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S | 28 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c | 21 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/multiarch/strncase.c | 25 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power8/strcasecmp.S | 446 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power8/strncase.S | 20 |
11 files changed, 598 insertions, 49 deletions
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile index 9ee9bc2044..e3ac285e00 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile @@ -21,7 +21,9 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ memmove-power7 memmove-ppc64 wordcopy-ppc64 bcopy-ppc64 \ strncpy-power8 strstr-power7 strstr-ppc64 \ strspn-power8 strspn-ppc64 strcspn-power8 strcspn-ppc64 \ - strlen-power8 strcasestr-power8 strcasestr-ppc64 + strlen-power8 strcasestr-power8 strcasestr-ppc64 \ + strcasecmp-ppc64 strcasecmp-power8 strncase-ppc64 \ + strncase-power8 CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c index a0dc8ad4c4..9f6bd7cd98 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c @@ -204,6 +204,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c. */ IFUNC_IMPL (i, name, strcasecmp, IFUNC_IMPL_ADD (array, i, strcasecmp, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strcasecmp_power8) + IFUNC_IMPL_ADD (array, i, strcasecmp, hwcap & PPC_FEATURE_HAS_VSX, __strcasecmp_power7) IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ppc)) @@ -219,6 +222,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/powerpc/powerpc64/multiarch/strncase.c. */ IFUNC_IMPL (i, name, strncasecmp, IFUNC_IMPL_ADD (array, i, strncasecmp, + hwcap2 & PPC_FEATURE2_ARCH_2_07, + __strncasecmp_power8) + IFUNC_IMPL_ADD (array, i, strncasecmp, hwcap & PPC_FEATURE_HAS_VSX, __strncasecmp_power7) IFUNC_IMPL_ADD (array, i, strncasecmp, 1, __strncasecmp_ppc)) diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S index 013dc62867..99cd7bd4df 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S +++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S @@ -1,4 +1,4 @@ -/* Optimized strcasecmp implementation foOWER7. +/* Optimized strcasecmp implementation for POWER7. Copyright (C) 2013-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -18,21 +18,7 @@ #include <sysdep.h> -#undef ENTRY -#define ENTRY(name) \ - .section ".text"; \ - ENTRY_2(__strcasecmp_power7) \ - .align ALIGNARG(2); \ - BODY_LABEL(__strcasecmp_power7): \ - cfi_startproc; \ - LOCALENTRY(__strcasecmp_power7) - -#undef END -#define END(name) \ - cfi_endproc; \ - TRACEBACK(__strcasecmp_power7) \ - END_2(__strcasecmp_power7) - +#define __strcasecmp __strcasecmp_power7 #undef weak_alias #define weak_alias(name, alias) diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S new file mode 100644 index 0000000000..492047a603 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S @@ -0,0 +1,28 @@ +/* Optimized strcasecmp implementation for POWER8. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define __strcasecmp __strcasecmp_power8 +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c new file mode 100644 index 0000000000..6318b4a428 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c @@ -0,0 +1,21 @@ +/* Multiarch strcasecmp for PPC64. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define strcasecmp __strcasecmp_ppc + +#include <string/strcasecmp.c> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c index 1f22336d49..5ec6885dfc 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c @@ -1,4 +1,4 @@ -/* Multiple versions of strcasecmp. +/* Multiple versions of strcasecmp Copyright (C) 2013-2016 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -16,25 +16,21 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#if IS_IN (libc) -# include <string.h> -# define strcasecmp __strcasecmp_ppc -extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden; -extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden; -#endif +#include <string.h> +#include <shlib-compat.h> +#include "init-arch.h" -#include <string/strcasecmp.c> -#undef strcasecmp +extern __typeof (__strcasecmp) __libc_strcasecmp; -#if IS_IN (libc) -# include <shlib-compat.h> -# include "init-arch.h" +extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden; +extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden; +extern __typeof (__strcasecmp) __strcasecmp_power8 attribute_hidden; -extern __typeof (__strcasecmp) __libc_strcasecmp; libc_ifunc (__libc_strcasecmp, - (hwcap & PPC_FEATURE_HAS_VSX) - ? __strcasecmp_power7 - : __strcasecmp_ppc); + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strcasecmp_power8: + (hwcap & PPC_FEATURE_HAS_VSX) + ? __strcasecmp_power7 + : __strcasecmp_ppc); weak_alias (__libc_strcasecmp, strcasecmp) -#endif diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S new file mode 100644 index 0000000000..01a63b5ba4 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S @@ -0,0 +1,28 @@ +/* Optimized strncasecmp implementation for POWER8. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define __strncasecmp __strncasecmp_power8 +#undef weak_alias +#define weak_alias(name, alias) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#include <sysdeps/powerpc/powerpc64/power8/strncase.S> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c new file mode 100644 index 0000000000..c245d775b9 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c @@ -0,0 +1,21 @@ +/* Multiarch strncasecmp for PPC64. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define strncasecmp __strncasecmp_ppc + +#include <string/strncase.c> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase.c b/sysdeps/powerpc/powerpc64/multiarch/strncase.c index 2729fcea83..0bfb25c381 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strncase.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strncase.c @@ -16,26 +16,21 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#if IS_IN (libc) -# include <string.h> -# define strncasecmp __strncasecmp_ppc -extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden; -extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden; -#endif +#include <string.h> +#include <shlib-compat.h> +#include "init-arch.h" -#include <string/strncase.c> -#undef strncasecmp +extern __typeof (__strncasecmp) __libc_strncasecmp; -#if IS_IN (libc) -# include <shlib-compat.h> -# include "init-arch.h" +extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden; +extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden; +extern __typeof (__strncasecmp) __strncasecmp_power8 attribute_hidden; -/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle - ifunc symbol properly. */ -extern __typeof (__strncasecmp) __libc_strncasecmp; libc_ifunc (__libc_strncasecmp, + (hwcap2 & PPC_FEATURE2_ARCH_2_07) + ? __strncasecmp_power8: (hwcap & PPC_FEATURE_HAS_VSX) ? __strncasecmp_power7 : __strncasecmp_ppc); + weak_alias (__libc_strncasecmp, strncasecmp) -#endif diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S new file mode 100644 index 0000000000..63f62171d9 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S @@ -0,0 +1,446 @@ +/* Optimized strcasecmp implementation for PowerPC64. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <locale-defines.h> + +/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) */ + +#ifndef USE_AS_STRNCASECMP +# define __STRCASECMP __strcasecmp +# define STRCASECMP strcasecmp +#else +# define __STRCASECMP __strncasecmp +# define STRCASECMP strncasecmp +#endif +/* Convert 16 bytes to lowercase and compare */ +#define TOLOWER() \ + vaddubm v8, v4, v1; \ + vaddubm v7, v4, v3; \ + vcmpgtub v8, v8, v2; \ + vsel v4, v7, v4, v8; \ + vaddubm v8, v5, v1; \ + vaddubm v7, v5, v3; \ + vcmpgtub v8, v8, v2; \ + vsel v5, v7, v5, v8; \ + vcmpequb. v7, v5, v4; + +/* Get 16 bytes for unaligned case. */ +#ifdef __LITTLE_ENDIAN__ +#define GET16BYTES(reg1, reg2, reg3) \ + lvx reg1, 0, reg2; \ + vcmpequb. v8, v0, reg1; \ + beq cr6, 1f; \ + vspltisb v9, 0; \ + b 2f; \ + .align 4; \ +1: \ + addi r6, reg2, 16; \ + lvx v9, 0, r6; \ +2: \ + vperm reg1, v9, reg1, reg3; +#else +#define GET16BYTES(reg1, reg2, reg3) \ + lvx reg1, 0, reg2; \ + vcmpequb. v8, v0, reg1; \ + beq cr6, 1f; \ + vspltisb v9, 0; \ + b 2f; \ + .align 4; \ +1: \ + addi r6, reg2, 16; \ + lvx v9, 0, r6; \ +2: \ + vperm reg1, reg1, v9, reg3; +#endif + +/* Check null in v4, v5 and convert to lower. */ +#define CHECKNULLANDCONVERT() \ + vcmpequb. v7, v0, v5; \ + beq cr6, 3f; \ + vcmpequb. v7, v0, v4; \ + beq cr6, 3f; \ + b L(null_found); \ + .align 4; \ +3: \ + TOLOWER() + +#ifdef _ARCH_PWR8 +# define VCLZD_V8_v7 vclzd v8, v7; +# define MFVRD_R3_V1 mfvrd r3, v1; +# define VSUBUDM_V9_V8 vsubudm v9, v9, v8; +# define VPOPCNTD_V8_V8 vpopcntd v8, v8; +# define VADDUQM_V7_V8 vadduqm v9, v7, v8; +#else +# define VCLZD_V8_v7 .long 0x11003fc2 +# define MFVRD_R3_V1 .long 0x7c230067 +# define VSUBUDM_V9_V8 .long 0x112944c0 +# define VPOPCNTD_V8_V8 .long 0x110047c3 +# define VADDUQM_V7_V8 .long 0x11274100 +#endif + + .machine power7 + +ENTRY (__STRCASECMP) +#ifdef USE_AS_STRNCASECMP + CALL_MCOUNT 3 +#else + CALL_MCOUNT 2 +#endif +#define rRTN r3 /* Return value */ +#define rSTR1 r10 /* 1st string */ +#define rSTR2 r4 /* 2nd string */ +#define rCHAR1 r6 /* Byte read from 1st string */ +#define rCHAR2 r7 /* Byte read from 2nd string */ +#define rADDR1 r8 /* Address of tolower(rCHAR1) */ +#define rADDR2 r12 /* Address of tolower(rCHAR2) */ +#define rLWR1 r8 /* Word tolower(rCHAR1) */ +#define rLWR2 r12 /* Word tolower(rCHAR2) */ +#define rTMP r9 +#define rLOC r11 /* Default locale address */ + + cmpd cr7, rRTN, rSTR2 + + /* Get locale address. */ + ld rTMP, __libc_tsd_LOCALE@got@tprel(r2) + add rLOC, rTMP, __libc_tsd_LOCALE@tls + ld rLOC, 0(rLOC) + + mr rSTR1, rRTN + li rRTN, 0 + beqlr cr7 +#ifdef USE_AS_STRNCASECMP + cmpdi cr7, r5, 0 + beq cr7, L(retnull) + cmpdi cr7, r5, 16 + blt cr7, L(bytebybyte) +#endif + vspltisb v0, 0 + vspltisb v8, -1 + /* Check for null in initial characters. + Check max of 16 char depending on the alignment. + If null is present, proceed byte by byte. */ + lvx v4, 0, rSTR1 +#ifdef __LITTLE_ENDIAN__ + lvsr v10, 0, rSTR1 /* Compute mask. */ + vperm v9, v8, v4, v10 /* Mask bits that are not part of string. */ +#else + lvsl v10, 0, rSTR1 + vperm v9, v4, v8, v10 +#endif + vcmpequb. v9, v0, v9 /* Check for null bytes. */ + bne cr6, L(bytebybyte) + lvx v5, 0, rSTR2 + /* Calculate alignment. */ +#ifdef __LITTLE_ENDIAN__ + lvsr v6, 0, rSTR2 + vperm v9, v8, v5, v6 /* Mask bits that are not part of string. */ +#else + lvsl v6, 0, rSTR2 + vperm v9, v5, v8, v6 +#endif + vcmpequb. v9, v0, v9 /* Check for null bytes. */ + bne cr6, L(bytebybyte) + /* Check if locale has non ascii characters. */ + ld rTMP, 0(rLOC) + addi r6, rTMP,LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES + lwz rTMP, 0(r6) + cmpdi cr7, rTMP, 1 + beq cr7, L(bytebybyte) + + /* Load vector registers with values used for TOLOWER. */ + /* Load v1 = 0xbf, v2 = 0x19 v3 = 0x20 in each byte. */ + vspltisb v3, 2 + vspltisb v9, 4 + vsl v3, v3, v9 + vaddubm v1, v3, v3 + vnor v1, v1, v1 + vspltisb v2, 7 + vsububm v2, v3, v2 + + andi. rADDR1, rSTR1, 0xF + beq cr0, L(align) + addi r6, rSTR1, 16 + lvx v9, 0, r6 + /* Compute 16 bytes from previous two loads. */ +#ifdef __LITTLE_ENDIAN__ + vperm v4, v9, v4, v10 +#else + vperm v4, v4, v9, v10 +#endif +L(align): + andi. rADDR2, rSTR2, 0xF + beq cr0, L(align1) + addi r6, rSTR2, 16 + lvx v9, 0, r6 + /* Compute 16 bytes from previous two loads. */ +#ifdef __LITTLE_ENDIAN__ + vperm v5, v9, v5, v6 +#else + vperm v5, v5, v9, v6 +#endif +L(align1): + CHECKNULLANDCONVERT() + blt cr6, L(match) + b L(different) + .align 4 +L(match): + clrldi r6, rSTR1, 60 + subfic r7, r6, 16 +#ifdef USE_AS_STRNCASECMP + sub r5, r5, r7 +#endif + add rSTR1, rSTR1, r7 + add rSTR2, rSTR2, r7 + andi. rADDR2, rSTR2, 0xF + addi rSTR1, rSTR1, -16 + addi rSTR2, rSTR2, -16 + beq cr0, L(aligned) +#ifdef __LITTLE_ENDIAN__ + lvsr v6, 0, rSTR2 +#else + lvsl v6, 0, rSTR2 +#endif + /* There are 2 loops depending on the input alignment. + Each loop gets 16 bytes from s1 and s2, check for null, + convert to lowercase and compare. Loop till difference + or null occurs. */ +L(s1_align): + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#ifdef USE_AS_STRNCASECMP + cmpdi cr7, r5, 16 + blt cr7, L(bytebybyte) + addi r5, r5, -16 +#endif + lvx v4, 0, rSTR1 + GET16BYTES(v5, rSTR2, v6) + CHECKNULLANDCONVERT() + blt cr6, L(s1_align) + b L(different) + .align 4 +L(aligned): + addi rSTR1, rSTR1, 16 + addi rSTR2, rSTR2, 16 +#ifdef USE_AS_STRNCASECMP + cmpdi cr7, r5, 16 + blt cr7, L(bytebybyte) + addi r5, r5, -16 +#endif + lvx v4, 0, rSTR1 + lvx v5, 0, rSTR2 + CHECKNULLANDCONVERT() + blt cr6, L(aligned) + + /* Calculate and return the difference. */ +L(different): + vaddubm v1, v3, v3 + vcmpequb v7, v0, v7 +#ifdef __LITTLE_ENDIAN__ + /* Count trailing zero. */ + vspltisb v8, -1 + VADDUQM_V7_V8 + vandc v8, v9, v7 + VPOPCNTD_V8_V8 + vspltb v6, v8, 15 + vcmpequb. v6, v6, v1 + blt cr6, L(shift8) +#else + /* Count leading zero. */ + VCLZD_V8_v7 + vspltb v6, v8, 7 + vcmpequb. v6, v6, v1 + blt cr6, L(shift8) + vsro v8, v8, v1 +#endif + b L(skipsum) + .align 4 +L(shift8): + vsumsws v8, v8, v0 +L(skipsum): +#ifdef __LITTLE_ENDIAN__ + /* Shift registers based on leading zero count. */ + vsro v6, v5, v8 + vsro v7, v4, v8 + /* Merge and move to GPR. */ + vmrglb v6, v6, v7 + vslo v1, v6, v1 + MFVRD_R3_V1 + /* Place the characters that are different in first position. */ + sldi rSTR2, rRTN, 56 + srdi rSTR2, rSTR2, 56 + sldi rSTR1, rRTN, 48 + srdi rSTR1, rSTR1, 56 +#else + vslo v6, v5, v8 + vslo v7, v4, v8 + vmrghb v1, v6, v7 + MFVRD_R3_V1 + srdi rSTR2, rRTN, 48 + sldi rSTR2, rSTR2, 56 + srdi rSTR2, rSTR2, 56 + srdi rSTR1, rRTN, 56 +#endif + subf rRTN, rSTR1, rSTR2 + extsw rRTN, rRTN + blr + + .align 4 + /* OK. We've hit the end of the string. We need to be careful that + we don't compare two strings as different because of junk beyond + the end of the strings... */ +L(null_found): + vaddubm v10, v3, v3 +#ifdef __LITTLE_ENDIAN__ + /* Count trailing zero. */ + vspltisb v8, -1 + VADDUQM_V7_V8 + vandc v8, v9, v7 + VPOPCNTD_V8_V8 + vspltb v6, v8, 15 + vcmpequb. v6, v6, v10 + blt cr6, L(shift_8) +#else + /* Count leading zero. */ + VCLZD_V8_v7 + vspltb v6, v8, 7 + vcmpequb. v6, v6, v10 + blt cr6, L(shift_8) + vsro v8, v8, v10 +#endif + b L(skipsum1) + .align 4 +L(shift_8): + vsumsws v8, v8, v0 +L(skipsum1): + /* Calculate shift count based on count of zero. */ + vspltisb v10, 7 + vslb v10, v10, v10 + vsldoi v9, v0, v10, 1 + VSUBUDM_V9_V8 + vspltisb v8, 8 + vsldoi v8, v0, v8, 1 + VSUBUDM_V9_V8 + /* Shift and remove junk after null character. */ +#ifdef __LITTLE_ENDIAN__ + vslo v5, v5, v9 + vslo v4, v4, v9 +#else + vsro v5, v5, v9 + vsro v4, v4, v9 +#endif + /* Convert and compare 16 bytes. */ + TOLOWER() + blt cr6, L(retnull) + b L(different) + .align 4 +L(retnull): + li rRTN, 0 + blr + .align 4 +L(bytebybyte): + /* Unrolling loop for POWER: loads are done with 'lbz' plus + offset and string descriptors are only updated in the end + of loop unrolling. */ + ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC) + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ +#ifdef USE_AS_STRNCASECMP + rldicl rTMP, r5, 62, 2 + cmpdi cr7, rTMP, 0 + beq cr7, L(lessthan4) + mtctr rTMP +#endif +L(loop): + cmpdi rCHAR1, 0 /* *s1 == '\0' ? */ + sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */ + sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */ + lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */ + lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */ + cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */ + crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */ + beq cr1, L(done) + lbz rCHAR1, 1(rSTR1) + lbz rCHAR2, 1(rSTR2) + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 2(rSTR1) + lbz rCHAR2, 2(rSTR2) + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 3(rSTR1) + lbz rCHAR2, 3(rSTR2) + cmpdi rCHAR1, 0 + /* Increment both string descriptors */ + addi rSTR1, rSTR1, 4 + addi rSTR2, rSTR2, 4 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ +#ifdef USE_AS_STRNCASECMP + bdnz L(loop) +#else + b L(loop) +#endif +#ifdef USE_AS_STRNCASECMP +L(lessthan4): + clrldi r5, r5, 62 + cmpdi cr7, r5, 0 + beq cr7, L(retnull) + mtctr r5 +L(loop1): + cmpdi rCHAR1, 0 + sldi rADDR1, rCHAR1, 2 + sldi rADDR2, rCHAR2, 2 + lwzx rLWR1, rLOC, rADDR1 + lwzx rLWR2, rLOC, rADDR2 + cmpw cr1, rLWR1, rLWR2 + crorc 4*cr1+eq,eq,4*cr1+eq + beq cr1, L(done) + addi rSTR1, rSTR1, 1 + addi rSTR2, rSTR2, 1 + lbz rCHAR1, 0(rSTR1) + lbz rCHAR2, 0(rSTR2) + bdnz L(loop1) +#endif +L(done): + subf r0, rLWR2, rLWR1 + extsw rRTN, r0 + blr +END (__STRCASECMP) + +weak_alias (__STRCASECMP, STRCASECMP) +libc_hidden_builtin_def (__STRCASECMP) diff --git a/sysdeps/powerpc/powerpc64/power8/strncase.S b/sysdeps/powerpc/powerpc64/power8/strncase.S new file mode 100644 index 0000000000..7ce2ed0968 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/power8/strncase.S @@ -0,0 +1,20 @@ +/* Optimized strncasecmp implementation for POWER8. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define USE_AS_STRNCASECMP 1 +#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S> |