diff options
Diffstat (limited to 'sysdeps/i386/i686')
167 files changed, 0 insertions, 38206 deletions
diff --git a/sysdeps/i386/i686/Makefile b/sysdeps/i386/i686/Makefile deleted file mode 100644 index 311042787b..0000000000 --- a/sysdeps/i386/i686/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# So that we can test __m128's alignment -stack-align-test-flags += -msse - -CFLAGS-.o += -Wa,-mtune=i686 -CFLAGS-.os += -Wa,-mtune=i686 -CFLAGS-.op += -Wa,-mtune=i686 -CFLAGS-.oS += -Wa,-mtune=i686 - -ASFLAGS-.o += -Wa,-mtune=i686 -ASFLAGS-.os += -Wa,-mtune=i686 -ASFLAGS-.op += -Wa,-mtune=i686 -ASFLAGS-.oS += -Wa,-mtune=i686 diff --git a/sysdeps/i386/i686/add_n.S b/sysdeps/i386/i686/add_n.S deleted file mode 100644 index 4afa648ceb..0000000000 --- a/sysdeps/i386/i686/add_n.S +++ /dev/null @@ -1,110 +0,0 @@ -/* Add two limb vectors of the same length > 0 and store sum in a third - limb vector. - Copyright (C) 1992-2017 Free Software Foundation, Inc. - This file is part of the GNU MP Library. - - The GNU MP Library is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1 of the License, or (at your - option) any later version. - - The GNU MP Library is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public - License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with the GNU MP Library; see the file COPYING.LIB. If not, - see <http://www.gnu.org/licenses/>. */ - -#include "sysdep.h" -#include "asm-syntax.h" - -#define PARMS 4+8 /* space for 2 saved regs */ -#define RES PARMS -#define S1 RES+4 -#define S2 S1+4 -#define SIZE S2+4 - - .text -#ifdef PIC -L(1): addl (%esp), %eax - ret -#endif -ENTRY (__mpn_add_n) - - pushl %edi - cfi_adjust_cfa_offset (4) - pushl %esi - cfi_adjust_cfa_offset (4) - - movl RES(%esp),%edi - cfi_rel_offset (edi, 4) - movl S1(%esp),%esi - cfi_rel_offset (esi, 0) - movl S2(%esp),%edx - movl SIZE(%esp),%ecx - movl %ecx,%eax - shrl $3,%ecx /* compute count for unrolled loop */ - negl %eax - andl $7,%eax /* get index where to start loop */ - jz L(oop) /* necessary special case for 0 */ - incl %ecx /* adjust loop count */ - shll $2,%eax /* adjustment for pointers... */ - subl %eax,%edi /* ... since they are offset ... */ - subl %eax,%esi /* ... by a constant when we ... */ - subl %eax,%edx /* ... enter the loop */ - shrl $2,%eax /* restore previous value */ -#ifdef PIC -/* Calculate start address in loop for PIC. */ - leal (L(oop)-L(0)-3)(%eax,%eax,8),%eax - call L(1) -L(0): -#else -/* Calculate start address in loop for non-PIC. */ - leal (L(oop) - 3)(%eax,%eax,8),%eax -#endif - jmp *%eax /* jump into loop */ - ALIGN (3) -L(oop): movl (%esi),%eax - adcl (%edx),%eax - movl %eax,(%edi) - movl 4(%esi),%eax - adcl 4(%edx),%eax - movl %eax,4(%edi) - movl 8(%esi),%eax - adcl 8(%edx),%eax - movl %eax,8(%edi) - movl 12(%esi),%eax - adcl 12(%edx),%eax - movl %eax,12(%edi) - movl 16(%esi),%eax - adcl 16(%edx),%eax - movl %eax,16(%edi) - movl 20(%esi),%eax - adcl 20(%edx),%eax - movl %eax,20(%edi) - movl 24(%esi),%eax - adcl 24(%edx),%eax - movl %eax,24(%edi) - movl 28(%esi),%eax - adcl 28(%edx),%eax - movl %eax,28(%edi) - leal 32(%edi),%edi - leal 32(%esi),%esi - leal 32(%edx),%edx - decl %ecx - jnz L(oop) - - sbbl %eax,%eax - negl %eax - - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (__mpn_add_n) diff --git a/sysdeps/i386/i686/bcopy.S b/sysdeps/i386/i686/bcopy.S deleted file mode 100644 index 15ef9419a4..0000000000 --- a/sysdeps/i386/i686/bcopy.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_BCOPY -#define memmove bcopy -#include <sysdeps/i386/i686/memmove.S> diff --git a/sysdeps/i386/i686/bzero.S b/sysdeps/i386/i686/bzero.S deleted file mode 100644 index c7898f18e0..0000000000 --- a/sysdeps/i386/i686/bzero.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_BZERO -#define memset __bzero -#include <sysdeps/i386/i686/memset.S> -weak_alias (__bzero, bzero) diff --git a/sysdeps/i386/i686/dl-hash.h b/sysdeps/i386/i686/dl-hash.h deleted file mode 100644 index ceda785b32..0000000000 --- a/sysdeps/i386/i686/dl-hash.h +++ /dev/null @@ -1,79 +0,0 @@ -/* Compute hash alue for given string according to ELF standard. - Copyright (C) 1998-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _DL_HASH_H -#define _DL_HASH_H 1 - - -/* This is the hashing function specified by the ELF ABI. It is highly - optimized for the PII processors. Though it will run on i586 it - would be much slower than the generic C implementation. So don't - use it. */ -static unsigned int -__attribute__ ((unused)) -_dl_elf_hash (const char *name) -{ - unsigned int result; - unsigned int temp0; - unsigned int temp1; - - __asm__ __volatile__ - ("movzbl (%1),%2\n\t" - "testl %2, %2\n\t" - "jz 1f\n\t" - "movl %2, %0\n\t" - "movzbl 1(%1), %2\n\t" - "jecxz 1f\n\t" - "shll $4, %0\n\t" - "addl %2, %0\n\t" - "movzbl 2(%1), %2\n\t" - "jecxz 1f\n\t" - "shll $4, %0\n\t" - "addl %2, %0\n\t" - "movzbl 3(%1), %2\n\t" - "jecxz 1f\n\t" - "shll $4, %0\n\t" - "addl %2, %0\n\t" - "movzbl 4(%1), %2\n\t" - "jecxz 1f\n\t" - "shll $4, %0\n\t" - "addl $5, %1\n\t" - "addl %2, %0\n\t" - "movzbl (%1), %2\n\t" - "jecxz 1f\n" - "2:\t" - "shll $4, %0\n\t" - "movl $0xf0000000, %3\n\t" - "incl %1\n\t" - "addl %2, %0\n\t" - "andl %0, %3\n\t" - "andl $0x0fffffff, %0\n\t" - "shrl $24, %3\n\t" - "movzbl (%1), %2\n\t" - "xorl %3, %0\n\t" - "testl %2, %2\n\t" - "jnz 2b\n" - "1:\t" - : "=&r" (result), "=r" (name), "=&c" (temp0), "=&r" (temp1) - : "0" (0), "1" ((const unsigned char *) name)); - - return result; -} - -#endif /* dl-hash.h */ diff --git a/sysdeps/i386/i686/ffs.c b/sysdeps/i386/i686/ffs.c deleted file mode 100644 index cbe36ff873..0000000000 --- a/sysdeps/i386/i686/ffs.c +++ /dev/null @@ -1,48 +0,0 @@ -/* ffs -- find first set bit in a word, counted from least significant end. - For Intel 80x86, x>=6. - This file is part of the GNU C Library. - Copyright (C) 1991-2017 Free Software Foundation, Inc. - Contributed by Ulrich Drepper <drepper@cygnus.com>. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define ffsl __something_else -#include <string.h> - -#undef ffs - -#ifdef __GNUC__ - -int -__ffs (int x) -{ - int cnt; - int tmp; - - asm ("bsfl %2,%0\n" /* Count low bits in X and store in %1. */ - "cmovel %1,%0\n" /* If number was zero, use -1 as result. */ - : "=&r" (cnt), "=r" (tmp) : "rm" (x), "1" (-1)); - - return cnt + 1; -} -weak_alias (__ffs, ffs) -libc_hidden_def (__ffs) -libc_hidden_builtin_def (ffs) -#undef ffsl -weak_alias (__ffs, ffsl) - -#else -#include <string/ffs.c> -#endif diff --git a/sysdeps/i386/i686/fpu/e_log.S b/sysdeps/i386/i686/fpu/e_log.S deleted file mode 100644 index 73060b088c..0000000000 --- a/sysdeps/i386/i686/fpu/e_log.S +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * - * Adapted for i686 instructions. - */ - -#include <machine/asm.h> - - - .text -ENTRY(__ieee754_log) - fldln2 // log(2) - fldl 4(%esp) // x : log(2) - fucomi %st - jp 3f - fyl2x // log(x) - ret - -3: fstp %st(1) - ret -END (__ieee754_log) - -ENTRY(__log_finite) - fldln2 // log(2) - fldl 4(%esp) // x : log(2) - fyl2x // log(x) - ret -END(__log_finite) diff --git a/sysdeps/i386/i686/fpu/e_logf.S b/sysdeps/i386/i686/fpu/e_logf.S deleted file mode 100644 index 6fd39d50d3..0000000000 --- a/sysdeps/i386/i686/fpu/e_logf.S +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * Adapted for float by Ulrich Drepper <drepper@cygnus.com>. - * - * Adapted for i686 instructions. - */ - -#include <machine/asm.h> - - - .text -ENTRY(__ieee754_logf) - fldln2 // log(2) - flds 4(%esp) // x : log(2) - fucomi %st - jp 3f - fyl2x // log(x) - ret - -3: fstp %st(1) - ret -END (__ieee754_logf) - -ENTRY(__logf_finite) - fldln2 // log(2) - flds 4(%esp) // x : log(2) - fyl2x // log(x) - ret -END(__logf_finite) diff --git a/sysdeps/i386/i686/fpu/e_logl.S b/sysdeps/i386/i686/fpu/e_logl.S deleted file mode 100644 index 7e3bc8d817..0000000000 --- a/sysdeps/i386/i686/fpu/e_logl.S +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Written by J.T. Conklin <jtc@netbsd.org>. - * Public domain. - * - * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>. - * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>. - * Adapted for i686 instructions. - */ - -#include <machine/asm.h> - - .section .rodata.cst8,"aM",@progbits,8 - - .p2align 3 - .type one,@object -one: .double 1.0 - ASM_SIZE_DIRECTIVE(one) - /* It is not important that this constant is precise. It is only - a value which is known to be on the safe side for using the - fyl2xp1 instruction. */ - .type limit,@object -limit: .double 0.29 - ASM_SIZE_DIRECTIVE(limit) - - -#ifdef PIC -# define MO(op) op##@GOTOFF(%edx) -#else -# define MO(op) op -#endif - - .text -ENTRY(__ieee754_logl) - fldln2 // log(2) - fldt 4(%esp) // x : log(2) - fucomi %st - jp 3f -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fld %st // x : x : log(2) - movzwl 4+8(%esp), %eax - cmpl $0xc000, %eax - jae 5f // x <= -2, avoid overflow from -LDBL_MAX - 1. - fsubl MO(one) // x-1 : x : log(2) -5: fld %st // x-1 : x-1 : x : log(2) - fabs // |x-1| : x-1 : x : log(2) - fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) - fcomip %st(1) // |x-1| : x-1 : x : log(2) - fstp %st(0) // x-1 : x : log(2) - jc 2f - fxam - fnstsw - andb $0x45, %ah - cmpb $0x40, %ah - jne 4f - fabs // log(1) is +0 in all rounding modes. -4: fstp %st(1) // x-1 : log(2) - fyl2xp1 // log(x) - ret - -2: fstp %st(0) // x : log(2) - fyl2x // log(x) - ret - -3: fstp %st(1) - fadd %st(0) - ret -END (__ieee754_logl) - -ENTRY(__logl_finite) - fldln2 // log(2) - fldt 4(%esp) // x : log(2) -#ifdef PIC - LOAD_PIC_REG (dx) -#endif - fld %st // x : x : log(2) - fsubl MO(one) // x-1 : x : log(2) - fld %st // x-1 : x-1 : x : log(2) - fabs // |x-1| : x-1 : x : log(2) - fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) - fcomip %st(1) // |x-1| : x-1 : x : log(2) - fstp %st(0) // x-1 : x : log(2) - jc 2b - fxam - fnstsw - andb $0x45, %ah - cmpb $0x40, %ah - jne 6f - fabs // log(1) is +0 in all rounding modes. -6: fstp %st(1) // x-1 : log(2) - fyl2xp1 // log(x) - ret -END(__logl_finite) diff --git a/sysdeps/i386/i686/fpu/multiarch/Makefile b/sysdeps/i386/i686/fpu/multiarch/Makefile deleted file mode 100644 index 7d9089232f..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -ifeq ($(subdir),math) -libm-sysdep_routines += e_expf-sse2 e_expf-ia32 s_sinf-sse2 s_cosf-sse2 \ - s_sincosf-sse2 -endif diff --git a/sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S b/sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S deleted file mode 100644 index b486b4d1ca..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S +++ /dev/null @@ -1,22 +0,0 @@ -/* - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define __ieee754_expf __ieee754_expf_ia32 -#define __expf_finite __expf_finite_ia32 - -#include <sysdeps/i386/fpu/e_expf.S> diff --git a/sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S deleted file mode 100644 index e6bb6fa289..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S +++ /dev/null @@ -1,325 +0,0 @@ -/* SSE2 version of __ieee754_expf and __expf_finite - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - - -#include <sysdep.h> - -/* Short algorithm description: - * - * Let K = 64 (table size). - * e^x = 2^(x/log(2)) = 2^n * T[j] * (1 + P(y)) - * where - * x = m*log(2)/K + y, y in [0.0..log(2)/K] - * m = n*K + j, m,n,j - signed integer, j in [0..K-1] - * values of 2^(j/K) are tabulated as T[j]. - * - * P(y) is a minimax polynomial approximation of expf(x)-1 - * on small interval [0.0..log(2)/K]. - * - * P(y) = P3*y*y*y*y + P2*y*y*y + P1*y*y + P0*y, calculated as - * z = y*y; P(y) = (P3*z + P1)*z + (P2*z + P0)*y - * - * Special cases: - * __ieee754_expf_sse2(NaN) = NaN - * __ieee754_expf_sse2(+INF) = +INF - * __ieee754_expf_sse2(-INF) = 0 - * __ieee754_expf_sse2(x) = 1 for subnormals - * for finite argument, only __ieee754_expf_sse2(0)=1 is exact - * __ieee754_expf_sse2(x) overflows if x>700 - * __ieee754_expf_sse2(x) underflows if x<-700 - * - * Note: - * For |x|<700, __ieee754_expf_sse2 computes result in double precision, - * with accuracy a bit more than needed for expf, and does not round it - * to single precision. - */ - - -#ifdef PIC -# define MO1(symbol) L(symbol)##@GOTOFF(%edx) -# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%edx,reg2,_scale) -#else -# define MO1(symbol) L(symbol) -# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale) -#endif - - .text -ENTRY(__ieee754_expf_sse2) - /* Input: single precision x on stack at address 4(%esp) */ - -#ifdef PIC - LOAD_PIC_REG(dx) -#endif - - cvtss2sd 4(%esp), %xmm1 /* Convert x to double precision */ - mov 4(%esp), %ecx /* Copy x */ - movsd MO1(DP_KLN2), %xmm2 /* DP K/log(2) */ - movsd MO1(DP_P2), %xmm3 /* DP P2 */ - movl %ecx, %eax /* x */ - mulsd %xmm1, %xmm2 /* DP x*K/log(2) */ - andl $0x7fffffff, %ecx /* |x| */ - cmpl $0x442f0000, %ecx /* |x|<700 ? */ - movsd MO1(DP_P3), %xmm4 /* DP P3 */ - addsd MO1(DP_RS), %xmm2 /* DP x*K/log(2)+RS */ - jae L(special_paths) - - /* Here if |x|<700 */ - cmpl $0x31800000, %ecx /* |x|<2^(-28) ? */ - jb L(small_arg) - - /* Main path: here if 2^(-28)<=|x|<700 */ - cvtsd2ss %xmm2, %xmm2 /* SP x*K/log(2)+RS */ - movd %xmm2, %eax /* bits of n*K+j with trash */ - subss MO1(SP_RS), %xmm2 /* SP t=round(x*K/log(2)) */ - movl %eax, %ecx /* n*K+j with trash */ - cvtss2sd %xmm2, %xmm2 /* DP t */ - andl $0x3f, %eax /* bits of j */ - mulsd MO1(DP_NLN2K), %xmm2 /* DP -t*log(2)/K */ - andl $0xffffffc0, %ecx /* bits of n */ -#ifdef __AVX__ - vaddsd %xmm1, %xmm2, %xmm0 /* DP y=x-t*log(2)/K */ - vmulsd %xmm0, %xmm0, %xmm2 /* DP z=y*y */ -#else - addsd %xmm1, %xmm2 /* DP y=x-t*log(2)/K */ - movaps %xmm2, %xmm0 /* DP y */ - mulsd %xmm2, %xmm2 /* DP z=y*y */ -#endif - mulsd %xmm2, %xmm4 /* DP P3*z */ - addl $0xffc0, %ecx /* bits of n + DP exponent bias */ - mulsd %xmm2, %xmm3 /* DP P2*z */ - shrl $2, %ecx /* High 2 bytes of DP 2^n */ - pxor %xmm1, %xmm1 /* clear %xmm1 */ - addsd MO1(DP_P1), %xmm4 /* DP P3*z+P1 */ - addsd MO1(DP_P0), %xmm3 /* DP P2*z+P0 */ - pinsrw $3, %ecx, %xmm1 /* DP 2^n */ - mulsd %xmm2, %xmm4 /* DP (P3*z+P1)*z */ - mulsd %xmm3, %xmm0 /* DP (P2*z+P0)*y */ - addsd %xmm4, %xmm0 /* DP P(y) */ - mulsd MO2(DP_T,%eax,8), %xmm0 /* DP P(y)*T[j] */ - addsd MO2(DP_T,%eax,8), %xmm0 /* DP T[j]*(P(y)+1) */ - mulsd %xmm1, %xmm0 /* DP result=2^n*(T[j]*(P(y)+1)) */ - cvtsd2ss %xmm0, %xmm1 - - lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */ - movss %xmm1, 0(%esp) /* Move result from sse... */ - flds 0(%esp) /* ...to FPU. */ - lea 4(%esp), %esp /* Return back 4 bytes of stack frame */ - ret - - .p2align 4 -L(small_arg): - /* Here if 0<=|x|<2^(-28) */ - movss 4(%esp), %xmm0 /* load x */ - addss MO1(SP_ONE), %xmm0 /* 1.0 + x */ - /* Return 1.0 with inexact raised, except for x==0 */ - jmp L(epilogue) - - .p2align 4 -L(special_paths): - /* Here if x is NaN, or Inf, or finite |x|>=700 */ - movss 4(%esp), %xmm0 /* load x */ - - cmpl $0x7f800000, %ecx /* |x| is finite ? */ - jae L(arg_inf_or_nan) - - /* Here if finite |x|>=700 */ - testl $0x80000000, %eax /* sign of x nonzero ? */ - je L(res_overflow) - - /* Here if finite x<=-700 */ - movss MO1(SP_SMALL), %xmm0 /* load small value 2^(-100) */ - mulss %xmm0, %xmm0 /* Return underflowed result (zero or subnormal) */ - jmp L(epilogue) - - .p2align 4 -L(res_overflow): - /* Here if finite x>=700 */ - movss MO1(SP_LARGE), %xmm0 /* load large value 2^100 */ - mulss %xmm0, %xmm0 /* Return overflowed result (Inf or max normal) */ - jmp L(epilogue) - - .p2align 4 -L(arg_inf_or_nan): - /* Here if |x| is Inf or NAN */ - jne L(arg_nan) /* |x| is Inf ? */ - - /* Here if |x| is Inf */ - shrl $31, %eax /* Get sign bit of x */ - movss MO2(SP_INF_0,%eax,4), %xmm0/* return zero or Inf, depending on sign of x */ - jmp L(epilogue) - - .p2align 4 -L(arg_nan): - /* Here if |x| is NaN */ - addss %xmm0, %xmm0 /* Return x+x (raise invalid) */ - - .p2align 4 -L(epilogue): - lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */ - movss %xmm0, 0(%esp) /* Move result from sse... */ - flds 0(%esp) /* ...to FPU. */ - lea 4(%esp), %esp /* Return back 4 bytes of stack frame */ - ret -END(__ieee754_expf_sse2) - - .section .rodata, "a" - .p2align 3 -L(DP_T): /* table of double precision values 2^(j/K) for j=[0..K-1] */ - .long 0x00000000, 0x3ff00000 - .long 0x3e778061, 0x3ff02c9a - .long 0xd3158574, 0x3ff059b0 - .long 0x18759bc8, 0x3ff08745 - .long 0x6cf9890f, 0x3ff0b558 - .long 0x32d3d1a2, 0x3ff0e3ec - .long 0xd0125b51, 0x3ff11301 - .long 0xaea92de0, 0x3ff1429a - .long 0x3c7d517b, 0x3ff172b8 - .long 0xeb6fcb75, 0x3ff1a35b - .long 0x3168b9aa, 0x3ff1d487 - .long 0x88628cd6, 0x3ff2063b - .long 0x6e756238, 0x3ff2387a - .long 0x65e27cdd, 0x3ff26b45 - .long 0xf51fdee1, 0x3ff29e9d - .long 0xa6e4030b, 0x3ff2d285 - .long 0x0a31b715, 0x3ff306fe - .long 0xb26416ff, 0x3ff33c08 - .long 0x373aa9cb, 0x3ff371a7 - .long 0x34e59ff7, 0x3ff3a7db - .long 0x4c123422, 0x3ff3dea6 - .long 0x21f72e2a, 0x3ff4160a - .long 0x6061892d, 0x3ff44e08 - .long 0xb5c13cd0, 0x3ff486a2 - .long 0xd5362a27, 0x3ff4bfda - .long 0x769d2ca7, 0x3ff4f9b2 - .long 0x569d4f82, 0x3ff5342b - .long 0x36b527da, 0x3ff56f47 - .long 0xdd485429, 0x3ff5ab07 - .long 0x15ad2148, 0x3ff5e76f - .long 0xb03a5585, 0x3ff6247e - .long 0x82552225, 0x3ff66238 - .long 0x667f3bcd, 0x3ff6a09e - .long 0x3c651a2f, 0x3ff6dfb2 - .long 0xe8ec5f74, 0x3ff71f75 - .long 0x564267c9, 0x3ff75feb - .long 0x73eb0187, 0x3ff7a114 - .long 0x36cf4e62, 0x3ff7e2f3 - .long 0x994cce13, 0x3ff82589 - .long 0x9b4492ed, 0x3ff868d9 - .long 0x422aa0db, 0x3ff8ace5 - .long 0x99157736, 0x3ff8f1ae - .long 0xb0cdc5e5, 0x3ff93737 - .long 0x9fde4e50, 0x3ff97d82 - .long 0x82a3f090, 0x3ff9c491 - .long 0x7b5de565, 0x3ffa0c66 - .long 0xb23e255d, 0x3ffa5503 - .long 0x5579fdbf, 0x3ffa9e6b - .long 0x995ad3ad, 0x3ffae89f - .long 0xb84f15fb, 0x3ffb33a2 - .long 0xf2fb5e47, 0x3ffb7f76 - .long 0x904bc1d2, 0x3ffbcc1e - .long 0xdd85529c, 0x3ffc199b - .long 0x2e57d14b, 0x3ffc67f1 - .long 0xdcef9069, 0x3ffcb720 - .long 0x4a07897c, 0x3ffd072d - .long 0xdcfba487, 0x3ffd5818 - .long 0x03db3285, 0x3ffda9e6 - .long 0x337b9b5f, 0x3ffdfc97 - .long 0xe78b3ff6, 0x3ffe502e - .long 0xa2a490da, 0x3ffea4af - .long 0xee615a27, 0x3ffefa1b - .long 0x5b6e4540, 0x3fff5076 - .long 0x819e90d8, 0x3fffa7c1 - .type L(DP_T), @object - ASM_SIZE_DIRECTIVE(L(DP_T)) - - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3 -L(DP_KLN2): /* double precision K/log(2) */ - .long 0x652b82fe, 0x40571547 - .type L(DP_KLN2), @object - ASM_SIZE_DIRECTIVE(L(DP_KLN2)) - - .p2align 3 -L(DP_NLN2K): /* double precision -log(2)/K */ - .long 0xfefa39ef, 0xbf862e42 - .type L(DP_NLN2K), @object - ASM_SIZE_DIRECTIVE(L(DP_NLN2K)) - - .p2align 3 -L(DP_RS): /* double precision 2^23+2^22 */ - .long 0x00000000, 0x41680000 - .type L(DP_RS), @object - ASM_SIZE_DIRECTIVE(L(DP_RS)) - - .p2align 3 -L(DP_P3): /* double precision polynomial coefficient P3 */ - .long 0xeb78fa85, 0x3fa56420 - .type L(DP_P3), @object - ASM_SIZE_DIRECTIVE(L(DP_P3)) - - .p2align 3 -L(DP_P1): /* double precision polynomial coefficient P1 */ - .long 0x008d6118, 0x3fe00000 - .type L(DP_P1), @object - ASM_SIZE_DIRECTIVE(L(DP_P1)) - - .p2align 3 -L(DP_P2): /* double precision polynomial coefficient P2 */ - .long 0xda752d4f, 0x3fc55550 - .type L(DP_P2), @object - ASM_SIZE_DIRECTIVE(L(DP_P2)) - - .p2align 3 -L(DP_P0): /* double precision polynomial coefficient P0 */ - .long 0xffffe7c6, 0x3fefffff - .type L(DP_P0), @object - ASM_SIZE_DIRECTIVE(L(DP_P0)) - - .p2align 2 -L(SP_INF_0): - .long 0x7f800000 /* single precision Inf */ - .long 0 /* single precision zero */ - .type L(SP_INF_0), @object - ASM_SIZE_DIRECTIVE(L(SP_INF_0)) - - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2 -L(SP_RS): /* single precision 2^23+2^22 */ - .long 0x4b400000 - .type L(SP_RS), @object - ASM_SIZE_DIRECTIVE(L(SP_RS)) - - .p2align 2 -L(SP_SMALL): /* single precision small value 2^(-100) */ - .long 0x0d800000 - .type L(SP_SMALL), @object - ASM_SIZE_DIRECTIVE(L(SP_SMALL)) - - .p2align 2 -L(SP_LARGE): /* single precision large value 2^100 */ - .long 0x71800000 - .type L(SP_LARGE), @object - ASM_SIZE_DIRECTIVE(L(SP_LARGE)) - - .p2align 2 -L(SP_ONE): /* single precision 1.0 */ - .long 0x3f800000 - .type L(SP_ONE), @object - ASM_SIZE_DIRECTIVE(L(SP_ONE)) - -strong_alias (__ieee754_expf_sse2, __expf_finite_sse2) diff --git a/sysdeps/i386/i686/fpu/multiarch/e_expf.c b/sysdeps/i386/i686/fpu/multiarch/e_expf.c deleted file mode 100644 index 388cf98a39..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/e_expf.c +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of expf - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <init-arch.h> - -extern double __ieee754_expf_sse2 (double); -extern double __ieee754_expf_ia32 (double); - -double __ieee754_expf (double); -libm_ifunc (__ieee754_expf, - HAS_CPU_FEATURE (SSE2) - ? __ieee754_expf_sse2 - : __ieee754_expf_ia32); - -extern double __expf_finite_sse2 (double); -extern double __expf_finite_ia32 (double); - -double __expf_finite (double); -libm_ifunc (__expf_finite, - HAS_CPU_FEATURE (SSE2) - ? __expf_finite_sse2 - : __expf_finite_ia32); diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps deleted file mode 100644 index 04bc23b37b..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps +++ /dev/null @@ -1,2188 +0,0 @@ -# Begin of automatic generation - -# Maximal error of functions: -Function: "acos": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "acos_downward": -ildouble: 2 -ldouble: 2 - -Function: "acos_towardzero": -ildouble: 2 -ldouble: 2 - -Function: "acos_upward": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "acosh": -double: 1 -idouble: 1 -ildouble: 4 -ldouble: 2 - -Function: "acosh_downward": -double: 1 -idouble: 1 -ildouble: 6 -ldouble: 4 - -Function: "acosh_towardzero": -double: 1 -idouble: 1 -ildouble: 6 -ldouble: 4 - -Function: "acosh_upward": -double: 1 -idouble: 1 -ildouble: 4 -ldouble: 3 - -Function: "asin": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "asin_downward": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "asin_towardzero": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "asin_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "asinh": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "asinh_downward": -double: 1 -float: 1 -idouble: 1 -ildouble: 5 -ldouble: 5 - -Function: "asinh_towardzero": -double: 1 -float: 1 -idouble: 1 -ildouble: 4 -ldouble: 4 - -Function: "asinh_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 5 - -Function: "atan": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan2": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan2_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan2_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan2_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "atan_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "atanh": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "atanh_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 4 - -Function: "atanh_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 3 - -Function: "atanh_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 5 - -Function: "cabs": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "cabs_downward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "cabs_towardzero": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "cabs_upward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "cacos": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "cacos": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Real part of "cacos_downward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "cacos_downward": -double: 5 -float: 3 -idouble: 5 -ifloat: 3 -ildouble: 6 -ldouble: 6 - -Function: Real part of "cacos_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "cacos_towardzero": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Real part of "cacos_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "cacos_upward": -double: 7 -float: 7 -idouble: 7 -ifloat: 7 -ildouble: 7 -ldouble: 7 - -Function: Real part of "cacosh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "cacosh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "cacosh_downward": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "cacosh_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Real part of "cacosh_towardzero": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "cacosh_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Real part of "cacosh_upward": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "cacosh_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: "carg": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "carg_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "carg_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "carg_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "casin": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "casin": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Real part of "casin_downward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "casin_downward": -double: 5 -float: 3 -idouble: 5 -ifloat: 3 -ildouble: 6 -ldouble: 6 - -Function: Real part of "casin_towardzero": -double: 3 -float: 1 -idouble: 3 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "casin_towardzero": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Real part of "casin_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "casin_upward": -double: 7 -float: 7 -idouble: 7 -ifloat: 7 -ildouble: 7 -ldouble: 7 - -Function: Real part of "casinh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "casinh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "casinh_downward": -double: 5 -float: 3 -idouble: 5 -ifloat: 3 -ildouble: 6 -ldouble: 6 - -Function: Imaginary part of "casinh_downward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Real part of "casinh_towardzero": -double: 4 -float: 3 -idouble: 4 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "casinh_towardzero": -double: 3 -float: 1 -idouble: 3 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "casinh_upward": -double: 7 -float: 7 -idouble: 7 -ifloat: 7 -ildouble: 7 -ldouble: 7 - -Function: Imaginary part of "casinh_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Real part of "catan": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "catan": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "catan_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "catan_downward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "catan_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "catan_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "catan_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "catan_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Real part of "catanh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "catanh": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "catanh_downward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "catanh_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "catanh_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "catanh_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "catanh_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "catanh_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "cbrt": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "cbrt_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "cbrt_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "cbrt_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "ccos": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "ccos": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "ccos_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "ccos_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "ccos_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "ccos_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "ccos_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "ccos_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Real part of "ccosh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "ccosh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "ccosh_downward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "ccosh_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "ccosh_towardzero": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "ccosh_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "ccosh_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "ccosh_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Real part of "cexp": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "cexp": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 1 -ldouble: 1 - -Function: Real part of "cexp_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "cexp_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "cexp_towardzero": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "cexp_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "cexp_upward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "cexp_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Real part of "clog": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "clog": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "clog10": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "clog10": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: Real part of "clog10_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 8 -ldouble: 8 - -Function: Imaginary part of "clog10_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "clog10_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 8 -ldouble: 8 - -Function: Imaginary part of "clog10_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "clog10_upward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 7 -ldouble: 7 - -Function: Imaginary part of "clog10_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "clog_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "clog_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "clog_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "clog_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "clog_upward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "clog_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "cos": -ildouble: 1 -ldouble: 1 - -Function: "cos_downward": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "cos_towardzero": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "cos_upward": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "cosh": -double: 1 -float: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "cosh_downward": -double: 2 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 3 - -Function: "cosh_towardzero": -double: 2 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "cosh_upward": -double: 4 -float: 2 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 3 - -Function: Real part of "cpow": -double: 2 -float: 5 -idouble: 2 -ifloat: 5 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "cpow": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: Real part of "cpow_downward": -double: 5 -float: 8 -idouble: 5 -ifloat: 8 -ildouble: 7 -ldouble: 7 - -Function: Imaginary part of "cpow_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Real part of "cpow_towardzero": -double: 5 -float: 8 -idouble: 5 -ifloat: 8 -ildouble: 7 -ldouble: 7 - -Function: Imaginary part of "cpow_towardzero": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 1 -ldouble: 1 - -Function: Real part of "cpow_upward": -double: 4 -float: 1 -idouble: 4 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "cpow_upward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: Real part of "csin": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "csin": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 - -Function: Real part of "csin_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "csin_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "csin_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "csin_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "csin_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "csin_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Real part of "csinh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "csinh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "csinh_downward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "csinh_downward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "csinh_towardzero": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "csinh_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: Real part of "csinh_upward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "csinh_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Real part of "csqrt": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "csqrt": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Real part of "csqrt_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "csqrt_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "csqrt_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "csqrt_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "csqrt_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "csqrt_upward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: Real part of "ctan": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Imaginary part of "ctan": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Real part of "ctan_downward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "ctan_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: Real part of "ctan_towardzero": -double: 3 -float: 1 -idouble: 3 -ifloat: 1 -ildouble: 5 -ldouble: 5 - -Function: Imaginary part of "ctan_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "ctan_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "ctan_upward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "ctanh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: Imaginary part of "ctanh": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: Real part of "ctanh_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "ctanh_downward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Real part of "ctanh_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: Imaginary part of "ctanh_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Real part of "ctanh_upward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: Imaginary part of "ctanh_upward": -double: 3 -float: 2 -idouble: 3 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: "erf": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "erf_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "erf_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "erf_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "erfc": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "erfc_downward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "erfc_towardzero": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: "erfc_upward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: "exp": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "exp10": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "exp10_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "exp10_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "exp10_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "exp2": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "exp2_downward": -ildouble: 1 -ldouble: 1 - -Function: "exp2_towardzero": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "exp2_upward": -ildouble: 1 -ldouble: 1 - -Function: "exp_downward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "exp_towardzero": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "exp_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "expm1": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "expm1_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "expm1_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "expm1_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "gamma": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "gamma_downward": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 7 -ldouble: 7 - -Function: "gamma_towardzero": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 7 -ldouble: 7 - -Function: "gamma_upward": -double: 3 -float: 4 -idouble: 3 -ifloat: 4 -ildouble: 5 -ldouble: 5 - -Function: "hypot": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "hypot_downward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "hypot_towardzero": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "hypot_upward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "j0": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "j0_downward": -double: 1 -float: 3 -idouble: 1 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "j0_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 5 -ldouble: 5 - -Function: "j0_upward": -double: 1 -float: 3 -idouble: 1 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "j1": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "j1_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 4 -ldouble: 4 - -Function: "j1_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "j1_upward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 3 -ldouble: 3 - -Function: "jn": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "jn_downward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "jn_towardzero": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: "jn_upward": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: "lgamma": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "lgamma_downward": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 7 -ldouble: 7 - -Function: "lgamma_towardzero": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 7 -ldouble: 7 - -Function: "lgamma_upward": -double: 3 -float: 4 -idouble: 3 -ifloat: 4 -ildouble: 5 -ldouble: 5 - -Function: "log": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "log10": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "log10_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "log10_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "log10_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "log1p": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "log1p_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "log1p_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 4 - -Function: "log1p_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "log2": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "log2_downward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "log2_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "log2_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "log_downward": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "log_towardzero": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "log_upward": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "pow": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "pow10": -double: 1 -idouble: 1 -ildouble: 1 -ldouble: 1 - -Function: "pow10_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "pow10_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "pow10_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "pow_downward": -double: 1 -idouble: 1 -ildouble: 4 -ldouble: 4 - -Function: "pow_towardzero": -double: 1 -idouble: 1 -ildouble: 4 -ldouble: 4 - -Function: "pow_upward": -double: 1 -idouble: 1 -ildouble: 4 -ldouble: 4 - -Function: "sin": -ildouble: 1 -ldouble: 1 - -Function: "sin_downward": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "sin_towardzero": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "sin_upward": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "sincos": -ildouble: 1 -ldouble: 1 - -Function: "sincos_downward": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "sincos_towardzero": -double: 1 -idouble: 1 -ildouble: 2 -ldouble: 2 - -Function: "sincos_upward": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "sinh": -double: 1 -ildouble: 2 -ldouble: 2 - -Function: "sinh_downward": -double: 2 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 5 - -Function: "sinh_towardzero": -double: 2 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 4 - -Function: "sinh_upward": -double: 4 -float: 2 -idouble: 1 -ifloat: 1 -ildouble: 4 -ldouble: 5 - -Function: "tan": -float: 1 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "tan_downward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: "tan_towardzero": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: "tan_upward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: "tanh": -double: 1 -idouble: 1 -ildouble: 3 -ldouble: 3 - -Function: "tanh_downward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 7 -ldouble: 4 - -Function: "tanh_towardzero": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 3 -ldouble: 3 - -Function: "tanh_upward": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 5 -ldouble: 4 - -Function: "tgamma": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: "tgamma_downward": -double: 3 -float: 4 -idouble: 3 -ifloat: 4 -ildouble: 5 -ldouble: 5 - -Function: "tgamma_towardzero": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 5 -ldouble: 5 - -Function: "tgamma_upward": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 5 -ldouble: 5 - -Function: "y0": -double: 1 -float: 1 -idouble: 1 -ifloat: 1 -ildouble: 1 -ldouble: 1 - -Function: "y0_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 5 -ldouble: 5 - -Function: "y0_towardzero": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 5 -ldouble: 5 - -Function: "y0_upward": -double: 1 -float: 2 -idouble: 1 -ifloat: 2 -ildouble: 3 -ldouble: 3 - -Function: "y1": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 - -Function: "y1_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 7 -ldouble: 7 - -Function: "y1_towardzero": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 5 -ldouble: 5 - -Function: "y1_upward": -double: 1 -float: 3 -idouble: 1 -ifloat: 3 -ildouble: 7 -ldouble: 7 - -Function: "yn": -double: 2 -float: 3 -idouble: 2 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -Function: "yn_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 5 -ldouble: 5 - -Function: "yn_towardzero": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 5 -ldouble: 5 - -Function: "yn_upward": -double: 3 -float: 3 -idouble: 3 -ifloat: 3 -ildouble: 4 -ldouble: 4 - -# end of automatic generation diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps-name b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps-name deleted file mode 100644 index 193dd704b3..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps-name +++ /dev/null @@ -1 +0,0 @@ -i686 diff --git a/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S deleted file mode 100644 index f37850d0b3..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S +++ /dev/null @@ -1,553 +0,0 @@ -/* Optimized with sse2 version of cosf - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#define __need_Emath -#include <bits/errno.h> - -/* Short algorithm description: - * - * 1) if |x| == 0: return 1.0-|x|. - * 2) if |x| < 2^-27: return 1.0-|x|. - * 3) if |x| < 2^-5 : return 1.0+x^2*DP_COS2_0+x^5*DP_COS2_1. - * 4) if |x| < Pi/4: return 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). - * 5) if |x| < 9*Pi/4: - * 5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+3, - * t=|x|-j*Pi/4. - * 5.2) Reconstruction: - * s = (-1.0)^((n>>2)&1) - * if(n&2 != 0) { - * using cos(t) polynomial for |t|<Pi/4, result is - * s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))). - * } else { - * using sin(t) polynomial for |t|<Pi/4, result is - * s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))). - * } - * 6) if |x| < 2^23, large args: - * 6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3, - * t=|x|-j*Pi/4. - * 6.2) Reconstruction same as (5.2). - * 7) if |x| >= 2^23, very large args: - * 7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3, - * t=|x|-j*Pi/4. - * 7.2) Reconstruction same as (5.2). - * 8) if x is Inf, return x-x, and set errno=EDOM. - * 9) if x is NaN, return x-x. - * - * Special cases: - * cos(+-0) = 1 not raising inexact, - * cos(subnormal) raises inexact, - * cos(min_normalized) raises inexact, - * cos(normalized) raises inexact, - * cos(Inf) = NaN, raises invalid, sets errno to EDOM, - * cos(NaN) = NaN. - */ - -#ifdef PIC -# define MO1(symbol) L(symbol)##@GOTOFF(%ebx) -# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale) -# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0) -# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG) -# define PUSH(REG) pushl REG; CFI_PUSH(REG) -# define POP(REG) popl REG; CFI_POP(REG) -# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx) -# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx) -# define ARG_X 8(%esp) -#else -# define MO1(symbol) L(symbol) -# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale) -# define ENTRANCE -# define RETURN ret -# define ARG_X 4(%esp) -#endif - - .text -ENTRY(__cosf_sse2) - /* Input: single precision x on stack at address ARG_X */ - - ENTRANCE - movl ARG_X, %eax /* Bits of x */ - cvtss2sd ARG_X, %xmm0 /* DP x */ - andl $0x7fffffff, %eax /* |x| */ - - cmpl $0x3f490fdb, %eax /* |x|<Pi/4? */ - jb L(arg_less_pio4) - - /* Here if |x|>=Pi/4 */ - movd %eax, %xmm3 /* SP |x| */ - andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */ - movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */ - - cmpl $0x40e231d6, %eax /* |x|<9*Pi/4? */ - jae L(large_args) - - /* Here if Pi/4<=|x|<9*Pi/4 */ - mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */ - cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */ - addl $1, %eax /* k+1 */ - movl $0x0e, %edx - andl %eax, %edx /* j = (k+1)&0x0e */ - addl $2, %eax /* n */ - subsd MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */ - -L(reconstruction): - /* Input: %eax=n, %xmm0=t */ - testl $2, %eax /* n&2 != 0? */ - jz L(sin_poly) - -/*L(cos_poly):*/ - /* Here if cos(x) calculated using cos(t) polynomial for |t|<Pi/4: - * y = t*t; z = y*y; - * s = sign(x) * (-1.0)^((n>>2)&1) - * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))) - */ - shrl $2, %eax /* n>>2 */ - mulsd %xmm0, %xmm0 /* y=t^2 */ - andl $1, %eax /* (n>>2)&1 */ - movaps %xmm0, %xmm1 /* y */ - mulsd %xmm0, %xmm0 /* z=t^4 */ - - movsd MO1(DP_C4), %xmm4 /* C4 */ - mulsd %xmm0, %xmm4 /* z*C4 */ - movsd MO1(DP_C3), %xmm3 /* C3 */ - mulsd %xmm0, %xmm3 /* z*C3 */ - addsd MO1(DP_C2), %xmm4 /* C2+z*C4 */ - mulsd %xmm0, %xmm4 /* z*(C2+z*C4) */ - lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */ - addsd MO1(DP_C1), %xmm3 /* C1+z*C3 */ - mulsd %xmm0, %xmm3 /* z*(C1+z*C3) */ - addsd MO1(DP_C0), %xmm4 /* C0+z*(C2+z*C4) */ - mulsd %xmm1, %xmm4 /* y*(C0+z*(C2+z*C4)) */ - - addsd %xmm4, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ - /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ - addsd MO1(DP_ONES), %xmm3 - - mulsd MO2(DP_ONES,%eax,8), %xmm3 /* DP result */ - movsd %xmm3, 0(%esp) /* Move result from sse... */ - fldl 0(%esp) /* ...to FPU. */ - /* Return back 4 bytes of stack frame */ - lea 8(%esp), %esp - RETURN - - .p2align 4 -L(sin_poly): - /* Here if cos(x) calculated using sin(t) polynomial for |t|<Pi/4: - * y = t*t; z = y*y; - * s = sign(x) * (-1.0)^((n>>2)&1) - * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))) - */ - - movaps %xmm0, %xmm4 /* t */ - shrl $2, %eax /* n>>2 */ - mulsd %xmm0, %xmm0 /* y=t^2 */ - andl $1, %eax /* (n>>2)&1 */ - movaps %xmm0, %xmm1 /* y */ - mulsd %xmm0, %xmm0 /* z=t^4 */ - - movsd MO1(DP_S4), %xmm2 /* S4 */ - mulsd %xmm0, %xmm2 /* z*S4 */ - movsd MO1(DP_S3), %xmm3 /* S3 */ - mulsd %xmm0, %xmm3 /* z*S3 */ - lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */ - addsd MO1(DP_S2), %xmm2 /* S2+z*S4 */ - mulsd %xmm0, %xmm2 /* z*(S2+z*S4) */ - addsd MO1(DP_S1), %xmm3 /* S1+z*S3 */ - mulsd %xmm0, %xmm3 /* z*(S1+z*S3) */ - addsd MO1(DP_S0), %xmm2 /* S0+z*(S2+z*S4) */ - mulsd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */ - /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */ - mulsd MO2(DP_ONES,%eax,8), %xmm4 - addsd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - mulsd %xmm4, %xmm3 - /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - addsd %xmm4, %xmm3 - movsd %xmm3, 0(%esp) /* Move result from sse... */ - fldl 0(%esp) /* ...to FPU. */ - /* Return back 4 bytes of stack frame */ - lea 8(%esp), %esp - RETURN - - .p2align 4 -L(large_args): - /* Here if |x|>=9*Pi/4 */ - cmpl $0x7f800000, %eax /* x is Inf or NaN? */ - jae L(arg_inf_or_nan) - - /* Here if finite |x|>=9*Pi/4 */ - cmpl $0x4b000000, %eax /* |x|<2^23? */ - jae L(very_large_args) - - /* Here if 9*Pi/4<=|x|<2^23 */ - movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */ - mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */ - cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */ - addl $1, %eax /* k+1 */ - movl %eax, %edx - andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */ - cvtsi2sdl %edx, %xmm4 /* DP j */ - movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */ - mulsd %xmm4, %xmm2 /* -j*PIO4HI */ - movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */ - addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */ - addl $2, %eax /* n */ - mulsd %xmm3, %xmm4 /* j*PIO4LO */ - addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */ - jmp L(reconstruction) - - .p2align 4 -L(very_large_args): - /* Here if finite |x|>=2^23 */ - - /* bitpos = (ix>>23) - BIAS_32 + 59; */ - shrl $23, %eax /* eb = biased exponent of x */ - /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */ - subl $68, %eax - movl $28, %ecx /* %cl=28 */ - movl %eax, %edx /* bitpos copy */ - - /* j = bitpos/28; */ - div %cl /* j in register %al=%ax/%cl */ - movapd %xmm0, %xmm3 /* |x| */ - /* clear unneeded remainder from %ah */ - andl $0xff, %eax - - imull $28, %eax, %ecx /* j*28 */ - movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */ - movapd %xmm0, %xmm5 /* |x| */ - mulsd -2*8+MO2(_FPI,%eax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */ - movapd %xmm0, %xmm1 /* |x| */ - mulsd -1*8+MO2(_FPI,%eax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */ - mulsd 0*8+MO2(_FPI,%eax,8), %xmm0 /* tmp0 = FPI[j]*|x| */ - addl $19, %ecx /* j*28+19 */ - mulsd 1*8+MO2(_FPI,%eax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */ - cmpl %ecx, %edx /* bitpos>=j*28+19? */ - jl L(very_large_skip1) - - /* Here if bitpos>=j*28+19 */ - andpd %xmm3, %xmm4 /* HI(tmp3) */ - subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */ -L(very_large_skip1): - - movsd MO1(DP_2POW52), %xmm6 - movapd %xmm5, %xmm2 /* tmp2 copy */ - addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */ - movl $1, %edx - addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */ - movsd 8+MO1(DP_2POW52), %xmm4 - movd %xmm6, %eax /* k = I64_LO(tmp6); */ - addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */ - comisd %xmm5, %xmm4 /* tmp4 > tmp5? */ - jbe L(very_large_skip2) - - /* Here if tmp4 > tmp5 */ - subl $1, %eax /* k-- */ - addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */ -L(very_large_skip2): - - andl %eax, %edx /* k&1 */ - subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */ - addsd MO2(DP_ZERONE,%edx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */ - addsd %xmm2, %xmm3 /* t += tmp2 */ - addsd %xmm3, %xmm0 /* t += tmp0 */ - addl $3, %eax /* n=k+3 */ - addsd %xmm1, %xmm0 /* t += tmp1 */ - mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */ - - jmp L(reconstruction) /* end of very_large_args peth */ - - .p2align 4 -L(arg_less_pio4): - /* Here if |x|<Pi/4 */ - cmpl $0x3d000000, %eax /* |x|<2^-5? */ - jl L(arg_less_2pn5) - - /* Here if 2^-5<=|x|<Pi/4 */ - mulsd %xmm0, %xmm0 /* y=x^2 */ - movaps %xmm0, %xmm1 /* y */ - mulsd %xmm0, %xmm0 /* z=x^4 */ - movsd MO1(DP_C4), %xmm3 /* C4 */ - mulsd %xmm0, %xmm3 /* z*C4 */ - movsd MO1(DP_C3), %xmm5 /* C3 */ - mulsd %xmm0, %xmm5 /* z*C3 */ - addsd MO1(DP_C2), %xmm3 /* C2+z*C4 */ - mulsd %xmm0, %xmm3 /* z*(C2+z*C4) */ - addsd MO1(DP_C1), %xmm5 /* C1+z*C3 */ - mulsd %xmm0, %xmm5 /* z*(C1+z*C3) */ - addsd MO1(DP_C0), %xmm3 /* C0+z*(C2+z*C4) */ - mulsd %xmm1, %xmm3 /* y*(C0+z*(C2+z*C4)) */ - addsd %xmm5, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ - /* 1.0 + y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ - addsd MO1(DP_ONES), %xmm3 - cvtsd2ss %xmm3, %xmm3 /* SP result */ - -L(epilogue): - lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */ - movss %xmm3, 0(%esp) /* Move result from sse... */ - flds 0(%esp) /* ...to FPU. */ - /* Return back 4 bytes of stack frame */ - lea 4(%esp), %esp - RETURN - - .p2align 4 -L(arg_less_2pn5): - /* Here if |x|<2^-5 */ - cmpl $0x32000000, %eax /* |x|<2^-27? */ - jl L(arg_less_2pn27) - - /* Here if 2^-27<=|x|<2^-5 */ - mulsd %xmm0, %xmm0 /* DP x^2 */ - movsd MO1(DP_COS2_1), %xmm3 /* DP DP_COS2_1 */ - mulsd %xmm0, %xmm3 /* DP x^2*DP_COS2_1 */ - addsd MO1(DP_COS2_0), %xmm3 /* DP DP_COS2_0+x^2*DP_COS2_1 */ - mulsd %xmm0, %xmm3 /* DP x^2*DP_COS2_0+x^4*DP_COS2_1 */ - /* DP 1.0+x^2*DP_COS2_0+x^4*DP_COS2_1 */ - addsd MO1(DP_ONES), %xmm3 - cvtsd2ss %xmm3, %xmm3 /* SP result */ - jmp L(epilogue) - - .p2align 4 -L(arg_less_2pn27): - /* Here if |x|<2^-27 */ - movss ARG_X, %xmm0 /* x */ - andps MO1(SP_ABS_MASK),%xmm0 /* |x| */ - movss MO1(SP_ONE), %xmm3 /* 1.0 */ - subss %xmm0, %xmm3 /* result is 1.0-|x| */ - jmp L(epilogue) - - .p2align 4 -L(arg_inf_or_nan): - /* Here if |x| is Inf or NAN */ - jne L(skip_errno_setting) /* in case of x is NaN */ - - /* Here if x is Inf. Set errno to EDOM. */ - call JUMPTARGET(__errno_location) - movl $EDOM, (%eax) - - .p2align 4 -L(skip_errno_setting): - /* Here if |x| is Inf or NAN. Continued. */ - movss ARG_X, %xmm3 /* load x */ - subss %xmm3, %xmm3 /* Result is NaN */ - jmp L(epilogue) -END(__cosf_sse2) - - .section .rodata, "a" - .p2align 3 -L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */ - .long 0x00000000,0x00000000 - .long 0x54442d18,0x3fe921fb - .long 0x54442d18,0x3ff921fb - .long 0x7f3321d2,0x4002d97c - .long 0x54442d18,0x400921fb - .long 0x2955385e,0x400f6a7a - .long 0x7f3321d2,0x4012d97c - .long 0xe9bba775,0x4015fdbb - .long 0x54442d18,0x401921fb - .long 0xbeccb2bb,0x401c463a - .long 0x2955385e,0x401f6a7a - .type L(PIO4J), @object - ASM_SIZE_DIRECTIVE(L(PIO4J)) - - .p2align 3 -L(_FPI): /* 4/Pi broken into sum of positive DP values */ - .long 0x00000000,0x00000000 - .long 0x6c000000,0x3ff45f30 - .long 0x2a000000,0x3e3c9c88 - .long 0xa8000000,0x3c54fe13 - .long 0xd0000000,0x3aaf47d4 - .long 0x6c000000,0x38fbb81b - .long 0xe0000000,0x3714acc9 - .long 0x7c000000,0x3560e410 - .long 0x56000000,0x33bca2c7 - .long 0xac000000,0x31fbd778 - .long 0xe0000000,0x300b7246 - .long 0xe8000000,0x2e5d2126 - .long 0x48000000,0x2c970032 - .long 0xe8000000,0x2ad77504 - .long 0xe0000000,0x290921cf - .long 0xb0000000,0x274deb1c - .long 0xe0000000,0x25829a73 - .long 0xbe000000,0x23fd1046 - .long 0x10000000,0x2224baed - .long 0x8e000000,0x20709d33 - .long 0x80000000,0x1e535a2f - .long 0x64000000,0x1cef904e - .long 0x30000000,0x1b0d6398 - .long 0x24000000,0x1964ce7d - .long 0x16000000,0x17b908bf - .type L(_FPI), @object - ASM_SIZE_DIRECTIVE(L(_FPI)) - -/* Coefficients of polynomial - for cos(x)~=1.0+x^2*DP_COS2_0+x^4*DP_COS2_1, |x|<2^-5. */ - .p2align 3 -L(DP_COS2_0): - .long 0xff5cc6fd,0xbfdfffff - .type L(DP_COS2_0), @object - ASM_SIZE_DIRECTIVE(L(DP_COS2_0)) - - .p2align 3 -L(DP_COS2_1): - .long 0xb178dac5,0x3fa55514 - .type L(DP_COS2_1), @object - ASM_SIZE_DIRECTIVE(L(DP_COS2_1)) - - .p2align 3 -L(DP_ZERONE): - .long 0x00000000,0x00000000 /* 0.0 */ - .long 0x00000000,0xbff00000 /* 1.0 */ - .type L(DP_ZERONE),@object - ASM_SIZE_DIRECTIVE(L(DP_ZERONE)) - - .p2align 3 -L(DP_ONES): - .long 0x00000000,0x3ff00000 /* +1.0 */ - .long 0x00000000,0xbff00000 /* -1.0 */ - .type L(DP_ONES), @object - ASM_SIZE_DIRECTIVE(L(DP_ONES)) - -/* Coefficients of polynomial - for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4. */ - .p2align 3 -L(DP_S3): - .long 0x64e6b5b4,0x3ec71d72 - .type L(DP_S3), @object - ASM_SIZE_DIRECTIVE(L(DP_S3)) - - .p2align 3 -L(DP_S1): - .long 0x10c2688b,0x3f811111 - .type L(DP_S1), @object - ASM_SIZE_DIRECTIVE(L(DP_S1)) - - .p2align 3 -L(DP_S4): - .long 0x1674b58a,0xbe5a947e - .type L(DP_S4), @object - ASM_SIZE_DIRECTIVE(L(DP_S4)) - - .p2align 3 -L(DP_S2): - .long 0x8b4bd1f9,0xbf2a019f - .type L(DP_S2), @object - ASM_SIZE_DIRECTIVE(L(DP_S2)) - - .p2align 3 -L(DP_S0): - .long 0x55551cd9,0xbfc55555 - .type L(DP_S0), @object - ASM_SIZE_DIRECTIVE(L(DP_S0)) - -/* Coefficients of polynomial - for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4. */ - .p2align 3 -L(DP_C3): - .long 0x9ac43cc0,0x3efa00eb - .type L(DP_C3), @object - ASM_SIZE_DIRECTIVE(L(DP_C3)) - - .p2align 3 -L(DP_C1): - .long 0x545c50c7,0x3fa55555 - .type L(DP_C1), @object - ASM_SIZE_DIRECTIVE(L(DP_C1)) - - .p2align 3 -L(DP_C4): - .long 0xdd8844d7,0xbe923c97 - .type L(DP_C4), @object - ASM_SIZE_DIRECTIVE(L(DP_C4)) - - .p2align 3 -L(DP_C2): - .long 0x348b6874,0xbf56c16b - .type L(DP_C2), @object - ASM_SIZE_DIRECTIVE(L(DP_C2)) - - .p2align 3 -L(DP_C0): - .long 0xfffe98ae,0xbfdfffff - .type L(DP_C0), @object - ASM_SIZE_DIRECTIVE(L(DP_C0)) - - .p2align 3 -L(DP_PIO4): - .long 0x54442d18,0x3fe921fb /* Pi/4 */ - .type L(DP_PIO4), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4)) - - .p2align 3 -L(DP_2POW52): - .long 0x00000000,0x43300000 /* +2^52 */ - .long 0x00000000,0xc3300000 /* -2^52 */ - .type L(DP_2POW52), @object - ASM_SIZE_DIRECTIVE(L(DP_2POW52)) - - .p2align 3 -L(DP_INVPIO4): - .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */ - .type L(DP_INVPIO4), @object - ASM_SIZE_DIRECTIVE(L(DP_INVPIO4)) - - .p2align 3 -L(DP_PIO4HI): - .long 0x54000000,0xbfe921fb /* High part of Pi/4 */ - .type L(DP_PIO4HI), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4HI)) - - .p2align 3 -L(DP_PIO4LO): - .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */ - .type L(DP_PIO4LO), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4LO)) - - .p2align 2 -L(SP_INVPIO4): - .long 0x3fa2f983 /* 4/Pi */ - .type L(SP_INVPIO4), @object - ASM_SIZE_DIRECTIVE(L(SP_INVPIO4)) - - .p2align 4 -L(DP_ABS_MASK): /* Mask for getting DP absolute value */ - .long 0xffffffff,0x7fffffff - .long 0xffffffff,0x7fffffff - .type L(DP_ABS_MASK), @object - ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK)) - - .p2align 3 -L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */ - .long 0x00000000,0xffffffff - .type L(DP_HI_MASK), @object - ASM_SIZE_DIRECTIVE(L(DP_HI_MASK)) - - .p2align 4 -L(SP_ABS_MASK): /* Mask for getting SP absolute value */ - .long 0x7fffffff,0x7fffffff - .long 0x7fffffff,0x7fffffff - .type L(SP_ABS_MASK), @object - ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK)) - - .p2align 2 -L(SP_ONE): - .long 0x3f800000 /* 1.0 */ - .type L(SP_ONE), @object - ASM_SIZE_DIRECTIVE(L(SP_ONE)) - -weak_alias (__cosf, cosf) diff --git a/sysdeps/i386/i686/fpu/multiarch/s_cosf.c b/sysdeps/i386/i686/fpu/multiarch/s_cosf.c deleted file mode 100644 index af588de9dc..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/s_cosf.c +++ /dev/null @@ -1,29 +0,0 @@ -/* Multiple versions of cosf - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <init-arch.h> - -extern float __cosf_sse2 (float); -extern float __cosf_ia32 (float); -float __cosf (float); - -libm_ifunc (__cosf, HAS_CPU_FEATURE (SSE2) ? __cosf_sse2 : __cosf_ia32); -weak_alias (__cosf, cosf); - -#define COSF __cosf_ia32 -#include <sysdeps/ieee754/flt-32/s_cosf.c> diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S deleted file mode 100644 index f31a925522..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S +++ /dev/null @@ -1,586 +0,0 @@ -/* Optimized with sse2 version of sincosf - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#define __need_Emath -#include <bits/errno.h> - -/* Short algorithm description: - * - * 1) if |x|==0: sin(x)=x, - * cos(x)=1. - * 2) if |x|<2^-27: sin(x)=x-x*DP_SMALL, raising underflow only when needed, - * cos(x)=1-|x|. - * 3) if |x|<2^-5 : sin(x)=x+x*x^2*DP_SIN2_0+x^5*DP_SIN2_1, - * cos(x)=1+1*x^2*DP_COS2_0+x^5*DP_COS2_1 - * 4) if |x|< Pi/4: sin(x)=x+x*x^2*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))), - * cos(x)=1+1*x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). - * 5) if |x| < 9*Pi/4: - * 5.1) Range reduction: - * k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1, t=|x|-j*Pi/4. - * 5.2) Reconstruction: - * sign_sin = sign(x) * (-1.0)^(( n >>2)&1) - * sign_cos = (-1.0)^(((n+2)>>2)&1) - * poly_sin = ((((S4*t^2 + S3)*t^2 + S2)*t^2 + S1)*t^2 + S0)*t^2*t+t - * poly_cos = ((((C4*t^2 + C3)*t^2 + C2)*t^2 + C1)*t^2 + C0)*t^2*s+s - * if(n&2 != 0) { - * using cos(t) and sin(t) polynomials for |t|<Pi/4, results are - * cos(x) = poly_sin * sign_cos - * sin(x) = poly_cos * sign_sin - * } else { - * sin(x) = poly_sin * sign_sin - * cos(x) = poly_cos * sign_cos - * } - * 6) if |x| < 2^23, large args: - * 6.1) Range reduction: - * k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4 - * 6.2) Reconstruction same as (5.2). - * 7) if |x| >= 2^23, very large args: - * 7.1) Range reduction: - * k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4. - * 7.2) Reconstruction same as (5.2). - * 8) if x is Inf, return x-x, and set errno=EDOM. - * 9) if x is NaN, return x-x. - * - * Special cases: - * sin/cos(+-0) = +-0/1 not raising inexact/underflow, - * sin/cos(subnormal) raises inexact/underflow, - * sin/cos(min_normalized) raises inexact/underflow, - * sin/cos(normalized) raises inexact, - * sin/cos(Inf) = NaN, raises invalid, sets errno to EDOM, - * sin/cos(NaN) = NaN. - */ - -#ifdef PIC -# define MO1(symbol) L(symbol)##@GOTOFF(%ebx) -# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale) -# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0) -# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG) -# define PUSH(REG) pushl REG; CFI_PUSH(REG) -# define POP(REG) popl REG; CFI_POP(REG) -# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx) -# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx) -# define ARG_X 8(%esp) -# define ARG_SIN_PTR 12(%esp) -# define ARG_COS_PTR 16(%esp) -#else -# define MO1(symbol) L(symbol) -# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale) -# define ENTRANCE -# define RETURN ret -# define ARG_X 4(%esp) -# define ARG_SIN_PTR 8(%esp) -# define ARG_COS_PTR 12(%esp) -#endif - - .text -ENTRY(__sincosf_sse2) - /* Input: single precision x on stack at address ARG_X */ - /* pointer to sin result on stack at address ARG_SIN_PTR */ - /* pointer to cos result on stack at address ARG_COS_PTR */ - - ENTRANCE - movl ARG_X, %eax /* Bits of x */ - cvtss2sd ARG_X, %xmm0 /* DP x */ - andl $0x7fffffff, %eax /* |x| */ - - cmpl $0x3f490fdb, %eax /* |x|<Pi/4 ? */ - jb L(arg_less_pio4) - - /* Here if |x|>=Pi/4 */ - movd %eax, %xmm3 /* SP |x| */ - andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */ - movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */ - - cmpl $0x40e231d6, %eax /* |x|<9*Pi/4 ? */ - jae L(large_args) - - /* Here if Pi/4<=|x|<9*Pi/4 */ - mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */ - movl ARG_X, %ecx /* Load x */ - cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */ - shrl $29, %ecx /* (sign of x) << 2 */ - addl $1, %eax /* k+1 */ - movl $0x0e, %edx - andl %eax, %edx /* j = (k+1)&0x0e */ - subsd MO2(PIO4J,%edx,8), %xmm0/* t = |x| - j * Pi/4 */ - -L(reconstruction): - /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */ - - movaps %xmm0, %xmm4 /* t */ - movhpd MO1(DP_ONES), %xmm4 /* 1|t */ - mulsd %xmm0, %xmm0 /* y=t^2 */ - movl $2, %edx - unpcklpd %xmm0, %xmm0 /* y|y */ - addl %eax, %edx /* k+2 */ - movaps %xmm0, %xmm1 /* y|y */ - mulpd %xmm0, %xmm0 /* z=t^4|z=t^4 */ - - movaps MO1(DP_SC4), %xmm2 /* S4 */ - mulpd %xmm0, %xmm2 /* z*S4 */ - movaps MO1(DP_SC3), %xmm3 /* S3 */ - mulpd %xmm0, %xmm3 /* z*S3 */ - xorl %eax, %ecx /* (sign_x ^ (k>>2))<<2 */ - addpd MO1(DP_SC2), %xmm2 /* S2+z*S4 */ - mulpd %xmm0, %xmm2 /* z*(S2+z*S4) */ - shrl $2, %edx /* (k+2)>>2 */ - addpd MO1(DP_SC1), %xmm3 /* S1+z*S3 */ - mulpd %xmm0, %xmm3 /* z*(S1+z*S3) */ - shrl $2, %ecx /* sign_x ^ k>>2 */ - addpd MO1(DP_SC0), %xmm2 /* S0+z*(S2+z*S4) */ - andl $1, %edx /* sign_cos = ((k+2)>>2)&1 */ - mulpd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */ - andl $1, %ecx /* sign_sin = sign_x ^ ((k>>2)&1) */ - addpd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - mulpd %xmm4, %xmm3 /*t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/ - testl $2, %eax /* n&2 != 0 ? */ - addpd %xmm4, %xmm3 /*t+t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/ - jnz L(sin_result_sin_poly) - -/*L(sin_result_cos_poly):*/ - /* - * Here if - * cos(x) = poly_sin * sign_cos - * sin(x) = poly_cos * sign_sin - */ - movsd MO2(DP_ONES,%ecx,8), %xmm4/* 0|sign_sin */ - movhpd MO2(DP_ONES,%edx,8), %xmm4/* sign_cos|sign_sin */ - mulpd %xmm4, %xmm3 /* result_cos|result_sin */ - movl ARG_SIN_PTR, %eax - cvtpd2ps %xmm3, %xmm0 /* SP results */ - movl ARG_COS_PTR, %ecx - movss %xmm0, (%eax) /* store sin(x) from xmm0[0] */ - shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */ - movss %xmm0, (%ecx) /* store cos(x) */ - RETURN - - .p2align 4 -L(sin_result_sin_poly): - /* - * Here if - * sin(x) = poly_sin * sign_sin - * cos(x) = poly_cos * sign_cos - */ - movsd MO2(DP_ONES,%edx,8), %xmm4/* 0|sign_cos */ - movhpd MO2(DP_ONES,%ecx,8), %xmm4/* sign_sin|sign_cos */ - mulpd %xmm4, %xmm3 /* result_sin|result_cos */ - movl ARG_SIN_PTR, %eax - cvtpd2ps %xmm3, %xmm0 /* SP results */ - movl ARG_COS_PTR, %ecx - movss %xmm0, (%ecx) /* store cos(x) from xmm0[0] */ - shufps $1, %xmm0, %xmm0 /* move sin(x) to xmm0[0] */ - movss %xmm0, (%eax) /* store sin(x) */ - RETURN - - .p2align 4 -L(large_args): - /* Here if |x|>=9*Pi/4 */ - cmpl $0x7f800000, %eax /* x is Inf or NaN ? */ - jae L(arg_inf_or_nan) - - /* Here if finite |x|>=9*Pi/4 */ - cmpl $0x4b000000, %eax /* |x|<2^23 ? */ - jae L(very_large_args) - - /* Here if 9*Pi/4<=|x|<2^23 */ - movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */ - mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */ - cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */ - addl $1, %eax /* k+1 */ - movl %eax, %edx - andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */ - cvtsi2sdl %edx, %xmm4 /* DP j */ - movl ARG_X, %ecx /* Load x */ - movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */ - shrl $29, %ecx /* (sign of x) << 2 */ - mulsd %xmm4, %xmm2 /* -j*PIO4HI */ - movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */ - addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */ - mulsd %xmm3, %xmm4 /* j*PIO4LO */ - addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */ - jmp L(reconstruction) - - .p2align 4 -L(very_large_args): - /* Here if finite |x|>=2^23 */ - - /* bitpos = (ix>>23) - BIAS_32 + 59; */ - shrl $23, %eax /* eb = biased exponent of x */ - subl $68, %eax /* bitpos=eb-0x7f+59, where 0x7f */ - /*is exponent bias */ - movl $28, %ecx /* %cl=28 */ - movl %eax, %edx /* bitpos copy */ - - /* j = bitpos/28; */ - div %cl /* j in register %al=%ax/%cl */ - movapd %xmm0, %xmm3 /* |x| */ - andl $0xff, %eax /* clear unneeded remainder from %ah*/ - - imull $28, %eax, %ecx /* j*28 */ - movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */ - movapd %xmm0, %xmm5 /* |x| */ - mulsd -2*8+MO2(_FPI,%eax,8), %xmm3/* tmp3 = FPI[j-2]*|x| */ - movapd %xmm0, %xmm1 /* |x| */ - mulsd -1*8+MO2(_FPI,%eax,8), %xmm5/* tmp2 = FPI[j-1]*|x| */ - mulsd 0*8+MO2(_FPI,%eax,8), %xmm0/* tmp0 = FPI[j]*|x| */ - addl $19, %ecx /* j*28+19 */ - mulsd 1*8+MO2(_FPI,%eax,8), %xmm1/* tmp1 = FPI[j+1]*|x| */ - cmpl %ecx, %edx /* bitpos>=j*28+19 ? */ - jl L(very_large_skip1) - - /* Here if bitpos>=j*28+19 */ - andpd %xmm3, %xmm4 /* HI(tmp3) */ - subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */ -L(very_large_skip1): - - movsd MO1(DP_2POW52), %xmm6 - movapd %xmm5, %xmm2 /* tmp2 copy */ - addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */ - movl $1, %edx - addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */ - movsd 8+MO1(DP_2POW52), %xmm4 - movd %xmm6, %eax /* k = I64_LO(tmp6); */ - addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */ - movl ARG_X, %ecx /* Load x */ - comisd %xmm5, %xmm4 /* tmp4 > tmp5 ? */ - jbe L(very_large_skip2) - - /* Here if tmp4 > tmp5 */ - subl $1, %eax /* k-- */ - addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */ -L(very_large_skip2): - - andl %eax, %edx /* k&1 */ - subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */ - addsd MO2(DP_ZERONE,%edx,8), %xmm3/* t = DP_ZERONE[k&1] + tmp3 */ - addsd %xmm2, %xmm3 /* t += tmp2 */ - shrl $29, %ecx /* (sign of x) << 2 */ - addsd %xmm3, %xmm0 /* t += tmp0 */ - addl $1, %eax /* n=k+1 */ - addsd %xmm1, %xmm0 /* t += tmp1 */ - mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */ - - jmp L(reconstruction) /* end of very_large_args peth */ - - .p2align 4 -L(arg_less_pio4): - /* Here if |x|<Pi/4 */ - cmpl $0x3d000000, %eax /* |x|<2^-5 ? */ - jl L(arg_less_2pn5) - - /* Here if 2^-5<=|x|<Pi/4 */ - movaps %xmm0, %xmm3 /* DP x */ - movhpd MO1(DP_ONES), %xmm3 /* DP 1|x */ - mulsd %xmm0, %xmm0 /* DP y=x^2 */ - unpcklpd %xmm0, %xmm0 /* DP y|y */ - movaps %xmm0, %xmm1 /* y|y */ - mulpd %xmm0, %xmm0 /* z=x^4|z=x^4 */ - - movapd MO1(DP_SC4), %xmm4 /* S4 */ - mulpd %xmm0, %xmm4 /* z*S4 */ - movapd MO1(DP_SC3), %xmm5 /* S3 */ - mulpd %xmm0, %xmm5 /* z*S3 */ - addpd MO1(DP_SC2), %xmm4 /* S2+z*S4 */ - mulpd %xmm0, %xmm4 /* z*(S2+z*S4) */ - addpd MO1(DP_SC1), %xmm5 /* S1+z*S3 */ - mulpd %xmm0, %xmm5 /* z*(S1+z*S3) */ - addpd MO1(DP_SC0), %xmm4 /* S0+z*(S2+z*S4) */ - mulpd %xmm1, %xmm4 /* y*(S0+z*(S2+z*S4)) */ - mulpd %xmm3, %xmm5 /* x*z*(S1+z*S3) */ - mulpd %xmm3, %xmm4 /* x*y*(S0+z*(S2+z*S4)) */ - addpd %xmm5, %xmm4 /*x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/ - movl ARG_SIN_PTR, %eax - addpd %xmm4, %xmm3 /*x+x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/ - movl ARG_COS_PTR, %ecx - cvtpd2ps %xmm3, %xmm0 /* SP results */ - movss %xmm0, (%eax) /* store sin(x) from xmm0[0] */ - shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */ - movss %xmm0, (%ecx) /* store cos(x) */ - RETURN - - .p2align 4 -L(arg_less_2pn5): - /* Here if |x|<2^-5 */ - cmpl $0x32000000, %eax /* |x|<2^-27 ? */ - jl L(arg_less_2pn27) - - /* Here if 2^-27<=|x|<2^-5 */ - movaps %xmm0, %xmm1 /* DP x */ - movhpd MO1(DP_ONES), %xmm1 /* DP 1|x */ - mulsd %xmm0, %xmm0 /* DP x^2 */ - unpcklpd %xmm0, %xmm0 /* DP x^2|x^2 */ - - movaps MO1(DP_SINCOS2_1), %xmm3/* DP DP_SIN2_1 */ - mulpd %xmm0, %xmm3 /* DP x^2*DP_SIN2_1 */ - addpd MO1(DP_SINCOS2_0), %xmm3/* DP DP_SIN2_0+x^2*DP_SIN2_1 */ - mulpd %xmm0, %xmm3 /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */ - mulpd %xmm1, %xmm3 /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */ - addpd %xmm1, %xmm3 /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */ - movl ARG_SIN_PTR, %eax - cvtpd2ps %xmm3, %xmm0 /* SP results */ - movl ARG_COS_PTR, %ecx - movss %xmm0, (%eax) /* store sin(x) from xmm0[0] */ - shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */ - movss %xmm0, (%ecx) /* store cos(x) */ - RETURN - - .p2align 4 -L(arg_less_2pn27): - movss ARG_X, %xmm7 /* SP x */ - cmpl $0, %eax /* x=0 ? */ - je L(arg_zero) /* in case x=0 return sin(+-0)==+-0 */ - /* Here if |x|<2^-27 */ - /* - * Special cases here: - * sin(subnormal) raises inexact/underflow - * sin(min_normalized) raises inexact/underflow - * sin(normalized) raises inexact - * cos(here)=1-|x| (raising inexact) - */ - movaps %xmm0, %xmm3 /* DP x */ - mulsd MO1(DP_SMALL), %xmm0 /* DP x*DP_SMALL */ - subsd %xmm0, %xmm3 /* DP sin result is x-x*DP_SMALL */ - andps MO1(SP_ABS_MASK), %xmm7 /* SP |x| */ - cvtsd2ss %xmm3, %xmm0 /* sin(x) */ - movl ARG_SIN_PTR, %eax - movss MO1(SP_ONE), %xmm1 /* SP 1.0 */ - movss %xmm0, (%eax) /* sin(x) store */ - movl ARG_COS_PTR, %ecx - subss %xmm7, %xmm1 /* cos(x) */ - movss %xmm1, (%ecx) /* cos(x) store */ - RETURN - - .p2align 4 -L(arg_zero): - movss MO1(SP_ONE), %xmm0 /* 1.0 */ - movl ARG_SIN_PTR, %eax - movl ARG_COS_PTR, %ecx - movss %xmm7, (%eax) /* sin(+-0)==x */ - movss %xmm0, (%ecx) /* cos(+-0)==1 */ - RETURN - - .p2align 4 -L(arg_inf_or_nan): - movss ARG_X, %xmm7 /* SP x */ - /* Here if |x| is Inf or NAN */ - jne L(skip_errno_setting) /* in case of x is NaN */ - - /* Here if x is Inf. Set errno to EDOM. */ - call JUMPTARGET(__errno_location) - movl $EDOM, (%eax) - - .p2align 4 -L(skip_errno_setting): - /* Here if |x| is Inf or NAN. Continued. */ - subss %xmm7, %xmm7 /* x-x, result is NaN */ - movl ARG_SIN_PTR, %eax - movl ARG_COS_PTR, %ecx - movss %xmm7, (%eax) - movss %xmm7, (%ecx) - RETURN -END(__sincosf_sse2) - - .section .rodata, "a" - .p2align 3 -L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */ - .long 0x00000000,0x00000000 - .long 0x54442d18,0x3fe921fb - .long 0x54442d18,0x3ff921fb - .long 0x7f3321d2,0x4002d97c - .long 0x54442d18,0x400921fb - .long 0x2955385e,0x400f6a7a - .long 0x7f3321d2,0x4012d97c - .long 0xe9bba775,0x4015fdbb - .long 0x54442d18,0x401921fb - .long 0xbeccb2bb,0x401c463a - .long 0x2955385e,0x401f6a7a - .type L(PIO4J), @object - ASM_SIZE_DIRECTIVE(L(PIO4J)) - - .p2align 3 -L(_FPI): /* 4/Pi broken into sum of positive DP values */ - .long 0x00000000,0x00000000 - .long 0x6c000000,0x3ff45f30 - .long 0x2a000000,0x3e3c9c88 - .long 0xa8000000,0x3c54fe13 - .long 0xd0000000,0x3aaf47d4 - .long 0x6c000000,0x38fbb81b - .long 0xe0000000,0x3714acc9 - .long 0x7c000000,0x3560e410 - .long 0x56000000,0x33bca2c7 - .long 0xac000000,0x31fbd778 - .long 0xe0000000,0x300b7246 - .long 0xe8000000,0x2e5d2126 - .long 0x48000000,0x2c970032 - .long 0xe8000000,0x2ad77504 - .long 0xe0000000,0x290921cf - .long 0xb0000000,0x274deb1c - .long 0xe0000000,0x25829a73 - .long 0xbe000000,0x23fd1046 - .long 0x10000000,0x2224baed - .long 0x8e000000,0x20709d33 - .long 0x80000000,0x1e535a2f - .long 0x64000000,0x1cef904e - .long 0x30000000,0x1b0d6398 - .long 0x24000000,0x1964ce7d - .long 0x16000000,0x17b908bf - .type L(_FPI), @object - ASM_SIZE_DIRECTIVE(L(_FPI)) - -/* Coefficients of polynomials for */ -/* sin(x)~=x+x*x^2*(DP_SIN2_0+x^2*DP_SIN2_1) in low DP part, */ -/* cos(x)~=1+1*x^2*(DP_COS2_0+x^2*DP_COS2_1) in high DP part, */ -/* for |x|<2^-5. */ - .p2align 4 -L(DP_SINCOS2_0): - .long 0x5543d49d,0xbfc55555 - .long 0xff5cc6fd,0xbfdfffff - .type L(DP_SINCOS2_0), @object - ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_0)) - - .p2align 4 -L(DP_SINCOS2_1): - .long 0x75cec8c5,0x3f8110f4 - .long 0xb178dac5,0x3fa55514 - .type L(DP_SINCOS2_1), @object - ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_1)) - - .p2align 3 -L(DP_ZERONE): - .long 0x00000000,0x00000000 /* 0.0 */ - .long 0x00000000,0xbff00000 /* 1.0 */ - .type L(DP_ZERONE), @object - ASM_SIZE_DIRECTIVE(L(DP_ZERONE)) - - .p2align 3 -L(DP_ONES): - .long 0x00000000,0x3ff00000 /* +1.0 */ - .long 0x00000000,0xbff00000 /* -1.0 */ - .type L(DP_ONES), @object - ASM_SIZE_DIRECTIVE(L(DP_ONES)) - -/* Coefficients of polynomials for */ -/* sin(t)~=t+t*t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))) in low DP part, */ -/* cos(t)~=1+1*t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))) in high DP part, */ -/* for |t|<Pi/4. */ - .p2align 4 -L(DP_SC4): - .long 0x1674b58a,0xbe5a947e - .long 0xdd8844d7,0xbe923c97 - .type L(DP_SC4), @object - ASM_SIZE_DIRECTIVE(L(DP_SC4)) - - .p2align 4 -L(DP_SC3): - .long 0x64e6b5b4,0x3ec71d72 - .long 0x9ac43cc0,0x3efa00eb - .type L(DP_SC3), @object - ASM_SIZE_DIRECTIVE(L(DP_SC3)) - - .p2align 4 -L(DP_SC2): - .long 0x8b4bd1f9,0xbf2a019f - .long 0x348b6874,0xbf56c16b - .type L(DP_SC2), @object - ASM_SIZE_DIRECTIVE(L(DP_SC2)) - - .p2align 4 -L(DP_SC1): - .long 0x10c2688b,0x3f811111 - .long 0x545c50c7,0x3fa55555 - .type L(DP_SC1), @object - ASM_SIZE_DIRECTIVE(L(DP_SC1)) - - .p2align 4 -L(DP_SC0): - .long 0x55551cd9,0xbfc55555 - .long 0xfffe98ae,0xbfdfffff - .type L(DP_SC0), @object - ASM_SIZE_DIRECTIVE(L(DP_SC0)) - - .p2align 3 -L(DP_SMALL): - .long 0x00000000,0x3cd00000 /* 2^(-50) */ - .type L(DP_SMALL), @object - ASM_SIZE_DIRECTIVE(L(DP_SMALL)) - - .p2align 3 -L(DP_PIO4): - .long 0x54442d18,0x3fe921fb /* Pi/4 */ - .type L(DP_PIO4), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4)) - - .p2align 3 -L(DP_2POW52): - .long 0x00000000,0x43300000 /* +2^52 */ - .long 0x00000000,0xc3300000 /* -2^52 */ - .type L(DP_2POW52), @object - ASM_SIZE_DIRECTIVE(L(DP_2POW52)) - - .p2align 3 -L(DP_INVPIO4): - .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */ - .type L(DP_INVPIO4), @object - ASM_SIZE_DIRECTIVE(L(DP_INVPIO4)) - - .p2align 3 -L(DP_PIO4HI): - .long 0x54000000,0xbfe921fb /* High part of Pi/4 */ - .type L(DP_PIO4HI), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4HI)) - - .p2align 3 -L(DP_PIO4LO): - .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */ - .type L(DP_PIO4LO), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4LO)) - - .p2align 2 -L(SP_INVPIO4): - .long 0x3fa2f983 /* 4/Pi */ - .type L(SP_INVPIO4), @object - ASM_SIZE_DIRECTIVE(L(SP_INVPIO4)) - - .p2align 4 -L(DP_ABS_MASK): /* Mask for getting DP absolute value */ - .long 0xffffffff,0x7fffffff - .long 0xffffffff,0x7fffffff - .type L(DP_ABS_MASK), @object - ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK)) - - .p2align 3 -L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */ - .long 0x00000000,0xffffffff - .type L(DP_HI_MASK), @object - ASM_SIZE_DIRECTIVE(L(DP_HI_MASK)) - - .p2align 4 -L(SP_ABS_MASK): /* Mask for getting SP absolute value */ - .long 0x7fffffff,0x7fffffff - .long 0x7fffffff,0x7fffffff - .type L(SP_ABS_MASK), @object - ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK)) - - .p2align 2 -L(SP_ONE): - .long 0x3f800000 /* 1.0 */ - .type L(SP_ONE), @object - ASM_SIZE_DIRECTIVE(L(SP_ONE)) - -weak_alias(__sincosf, sincosf) diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c b/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c deleted file mode 100644 index 9428f9b4ea..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c +++ /dev/null @@ -1,30 +0,0 @@ -/* Multiple versions of sincosf - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <init-arch.h> - -extern void __sincosf_sse2 (float, float *, float *); -extern void __sincosf_ia32 (float, float *, float *); -void __sincosf (float, float *, float *); - -libm_ifunc (__sincosf, - HAS_CPU_FEATURE (SSE2) ? __sincosf_sse2 : __sincosf_ia32); -weak_alias (__sincosf, sincosf); - -#define SINCOSF __sincosf_ia32 -#include <sysdeps/ieee754/flt-32/s_sincosf.c> diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S deleted file mode 100644 index ee96018061..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S +++ /dev/null @@ -1,566 +0,0 @@ -/* Optimized with sse2 version of sinf - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#define __need_Emath -#include <bits/errno.h> - -/* Short algorithm description: - * - * 1) if |x| == 0: return x. - * 2) if |x| < 2^-27: return x-x*DP_SMALL, raise underflow only when needed. - * 3) if |x| < 2^-5 : return x+x^3*DP_SIN2_0+x^5*DP_SIN2_1. - * 4) if |x| < Pi/4: return x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). - * 5) if |x| < 9*Pi/4: - * 5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1, - * t=|x|-j*Pi/4. - * 5.2) Reconstruction: - * s = sign(x) * (-1.0)^((n>>2)&1) - * if(n&2 != 0) { - * using cos(t) polynomial for |t|<Pi/4, result is - * s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))). - * } else { - * using sin(t) polynomial for |t|<Pi/4, result is - * s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))). - * } - * 6) if |x| < 2^23, large args: - * 6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, - * t=|x|-j*Pi/4. - * 6.2) Reconstruction same as (5.2). - * 7) if |x| >= 2^23, very large args: - * 7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, - * t=|x|-j*Pi/4. - * 7.2) Reconstruction same as (5.2). - * 8) if x is Inf, return x-x, and set errno=EDOM. - * 9) if x is NaN, return x-x. - * - * Special cases: - * sin(+-0) = +-0 not raising inexact/underflow, - * sin(subnormal) raises inexact/underflow, - * sin(min_normalized) raises inexact/underflow, - * sin(normalized) raises inexact, - * sin(Inf) = NaN, raises invalid, sets errno to EDOM, - * sin(NaN) = NaN. - */ - -#ifdef PIC -# define MO1(symbol) L(symbol)##@GOTOFF(%ebx) -# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale) -# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0) -# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG) -# define PUSH(REG) pushl REG; CFI_PUSH(REG) -# define POP(REG) popl REG; CFI_POP(REG) -# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx) -# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx) -# define ARG_X 8(%esp) -#else -# define MO1(symbol) L(symbol) -# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale) -# define ENTRANCE -# define RETURN ret -# define ARG_X 4(%esp) -#endif - - .text -ENTRY(__sinf_sse2) - /* Input: single precision x on stack at address ARG_X */ - - ENTRANCE - movl ARG_X, %eax /* Bits of x */ - cvtss2sd ARG_X, %xmm0 /* DP x */ - andl $0x7fffffff, %eax /* |x| */ - - cmpl $0x3f490fdb, %eax /* |x|<Pi/4? */ - jb L(arg_less_pio4) - - /* Here if |x|>=Pi/4 */ - movd %eax, %xmm3 /* SP |x| */ - andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */ - movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */ - - cmpl $0x40e231d6, %eax /* |x|<9*Pi/4? */ - jae L(large_args) - - /* Here if Pi/4<=|x|<9*Pi/4 */ - mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */ - movl ARG_X, %ecx /* Load x */ - cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */ - shrl $31, %ecx /* sign of x */ - addl $1, %eax /* k+1 */ - movl $0x0e, %edx - andl %eax, %edx /* j = (k+1)&0x0e */ - subsd MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */ - -L(reconstruction): - /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */ - testl $2, %eax /* n&2 != 0? */ - jz L(sin_poly) - -/*L(cos_poly):*/ - /* Here if sin(x) calculated using cos(t) polynomial for |t|<Pi/4: - * y = t*t; z = y*y; - * s = sign(x) * (-1.0)^((n>>2)&1) - * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))) - */ - shrl $2, %eax /* n>>2 */ - mulsd %xmm0, %xmm0 /* y=t^2 */ - andl $1, %eax /* (n>>2)&1 */ - movaps %xmm0, %xmm1 /* y */ - mulsd %xmm0, %xmm0 /* z=t^4 */ - - movsd MO1(DP_C4), %xmm4 /* C4 */ - mulsd %xmm0, %xmm4 /* z*C4 */ - xorl %eax, %ecx /* (-1.0)^((n>>2)&1) XOR sign(x) */ - movsd MO1(DP_C3), %xmm3 /* C3 */ - mulsd %xmm0, %xmm3 /* z*C3 */ - addsd MO1(DP_C2), %xmm4 /* C2+z*C4 */ - mulsd %xmm0, %xmm4 /* z*(C2+z*C4) */ - lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */ - addsd MO1(DP_C1), %xmm3 /* C1+z*C3 */ - mulsd %xmm0, %xmm3 /* z*(C1+z*C3) */ - addsd MO1(DP_C0), %xmm4 /* C0+z*(C2+z*C4) */ - mulsd %xmm1, %xmm4 /* y*(C0+z*(C2+z*C4)) */ - - addsd %xmm4, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ - /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ - addsd MO1(DP_ONES), %xmm3 - - mulsd MO2(DP_ONES,%ecx,8), %xmm3 /* DP result */ - movsd %xmm3, 0(%esp) /* Move result from sse... */ - fldl 0(%esp) /* ...to FPU. */ - /* Return back 4 bytes of stack frame */ - lea 8(%esp), %esp - RETURN - - .p2align 4 -L(sin_poly): - /* Here if sin(x) calculated using sin(t) polynomial for |t|<Pi/4: - * y = t*t; z = y*y; - * s = sign(x) * (-1.0)^((n>>2)&1) - * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))) - */ - - movaps %xmm0, %xmm4 /* t */ - shrl $2, %eax /* n>>2 */ - mulsd %xmm0, %xmm0 /* y=t^2 */ - andl $1, %eax /* (n>>2)&1 */ - movaps %xmm0, %xmm1 /* y */ - xorl %eax, %ecx /* (-1.0)^((n>>2)&1) XOR sign(x) */ - mulsd %xmm0, %xmm0 /* z=t^4 */ - - movsd MO1(DP_S4), %xmm2 /* S4 */ - mulsd %xmm0, %xmm2 /* z*S4 */ - movsd MO1(DP_S3), %xmm3 /* S3 */ - mulsd %xmm0, %xmm3 /* z*S3 */ - lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */ - addsd MO1(DP_S2), %xmm2 /* S2+z*S4 */ - mulsd %xmm0, %xmm2 /* z*(S2+z*S4) */ - addsd MO1(DP_S1), %xmm3 /* S1+z*S3 */ - mulsd %xmm0, %xmm3 /* z*(S1+z*S3) */ - addsd MO1(DP_S0), %xmm2 /* S0+z*(S2+z*S4) */ - mulsd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */ - /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */ - mulsd MO2(DP_ONES,%ecx,8), %xmm4 - addsd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - mulsd %xmm4, %xmm3 - /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - addsd %xmm4, %xmm3 - movsd %xmm3, 0(%esp) /* Move result from sse... */ - fldl 0(%esp) /* ...to FPU. */ - /* Return back 4 bytes of stack frame */ - lea 8(%esp), %esp - RETURN - - .p2align 4 -L(large_args): - /* Here if |x|>=9*Pi/4 */ - cmpl $0x7f800000, %eax /* x is Inf or NaN? */ - jae L(arg_inf_or_nan) - - /* Here if finite |x|>=9*Pi/4 */ - cmpl $0x4b000000, %eax /* |x|<2^23? */ - jae L(very_large_args) - - /* Here if 9*Pi/4<=|x|<2^23 */ - movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */ - mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */ - cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */ - addl $1, %eax /* k+1 */ - movl %eax, %edx - andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */ - cvtsi2sdl %edx, %xmm4 /* DP j */ - movl ARG_X, %ecx /* Load x */ - movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */ - shrl $31, %ecx /* sign bit of x */ - mulsd %xmm4, %xmm2 /* -j*PIO4HI */ - movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */ - addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */ - mulsd %xmm3, %xmm4 /* j*PIO4LO */ - addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */ - jmp L(reconstruction) - - .p2align 4 -L(very_large_args): - /* Here if finite |x|>=2^23 */ - - /* bitpos = (ix>>23) - BIAS_32 + 59; */ - shrl $23, %eax /* eb = biased exponent of x */ - /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */ - subl $68, %eax - movl $28, %ecx /* %cl=28 */ - movl %eax, %edx /* bitpos copy */ - - /* j = bitpos/28; */ - div %cl /* j in register %al=%ax/%cl */ - movapd %xmm0, %xmm3 /* |x| */ - /* clear unneeded remainder from %ah */ - andl $0xff, %eax - - imull $28, %eax, %ecx /* j*28 */ - movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */ - movapd %xmm0, %xmm5 /* |x| */ - mulsd -2*8+MO2(_FPI,%eax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */ - movapd %xmm0, %xmm1 /* |x| */ - mulsd -1*8+MO2(_FPI,%eax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */ - mulsd 0*8+MO2(_FPI,%eax,8), %xmm0 /* tmp0 = FPI[j]*|x| */ - addl $19, %ecx /* j*28+19 */ - mulsd 1*8+MO2(_FPI,%eax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */ - cmpl %ecx, %edx /* bitpos>=j*28+19? */ - jl L(very_large_skip1) - - /* Here if bitpos>=j*28+19 */ - andpd %xmm3, %xmm4 /* HI(tmp3) */ - subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */ -L(very_large_skip1): - - movsd MO1(DP_2POW52), %xmm6 - movapd %xmm5, %xmm2 /* tmp2 copy */ - addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */ - movl $1, %edx - addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */ - movsd 8+MO1(DP_2POW52), %xmm4 - movd %xmm6, %eax /* k = I64_LO(tmp6); */ - addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */ - movl ARG_X, %ecx /* Load x */ - comisd %xmm5, %xmm4 /* tmp4 > tmp5? */ - jbe L(very_large_skip2) - - /* Here if tmp4 > tmp5 */ - subl $1, %eax /* k-- */ - addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */ -L(very_large_skip2): - - andl %eax, %edx /* k&1 */ - subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */ - addsd MO2(DP_ZERONE,%edx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */ - addsd %xmm2, %xmm3 /* t += tmp2 */ - shrl $31, %ecx /* sign of x */ - addsd %xmm3, %xmm0 /* t += tmp0 */ - addl $1, %eax /* n=k+1 */ - addsd %xmm1, %xmm0 /* t += tmp1 */ - mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */ - - jmp L(reconstruction) /* end of very_large_args peth */ - - .p2align 4 -L(arg_less_pio4): - /* Here if |x|<Pi/4 */ - cmpl $0x3d000000, %eax /* |x|<2^-5? */ - jl L(arg_less_2pn5) - - /* Here if 2^-5<=|x|<Pi/4 */ - movaps %xmm0, %xmm3 /* x */ - mulsd %xmm0, %xmm0 /* y=x^2 */ - movaps %xmm0, %xmm1 /* y */ - mulsd %xmm0, %xmm0 /* z=x^4 */ - movsd MO1(DP_S4), %xmm4 /* S4 */ - mulsd %xmm0, %xmm4 /* z*S4 */ - movsd MO1(DP_S3), %xmm5 /* S3 */ - mulsd %xmm0, %xmm5 /* z*S3 */ - addsd MO1(DP_S2), %xmm4 /* S2+z*S4 */ - mulsd %xmm0, %xmm4 /* z*(S2+z*S4) */ - addsd MO1(DP_S1), %xmm5 /* S1+z*S3 */ - mulsd %xmm0, %xmm5 /* z*(S1+z*S3) */ - addsd MO1(DP_S0), %xmm4 /* S0+z*(S2+z*S4) */ - mulsd %xmm1, %xmm4 /* y*(S0+z*(S2+z*S4)) */ - mulsd %xmm3, %xmm5 /* x*z*(S1+z*S3) */ - mulsd %xmm3, %xmm4 /* x*y*(S0+z*(S2+z*S4)) */ - /* x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - addsd %xmm5, %xmm4 - /* x + x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - addsd %xmm4, %xmm3 - cvtsd2ss %xmm3, %xmm3 /* SP result */ - -L(epilogue): - lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */ - movss %xmm3, 0(%esp) /* Move result from sse... */ - flds 0(%esp) /* ...to FPU. */ - /* Return back 4 bytes of stack frame */ - lea 4(%esp), %esp - RETURN - - .p2align 4 -L(arg_less_2pn5): - /* Here if |x|<2^-5 */ - cmpl $0x32000000, %eax /* |x|<2^-27? */ - jl L(arg_less_2pn27) - - /* Here if 2^-27<=|x|<2^-5 */ - movaps %xmm0, %xmm1 /* DP x */ - mulsd %xmm0, %xmm0 /* DP x^2 */ - movsd MO1(DP_SIN2_1), %xmm3 /* DP DP_SIN2_1 */ - mulsd %xmm0, %xmm3 /* DP x^2*DP_SIN2_1 */ - addsd MO1(DP_SIN2_0), %xmm3 /* DP DP_SIN2_0+x^2*DP_SIN2_1 */ - mulsd %xmm0, %xmm3 /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */ - mulsd %xmm1, %xmm3 /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */ - addsd %xmm1, %xmm3 /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */ - cvtsd2ss %xmm3, %xmm3 /* SP result */ - jmp L(epilogue) - - .p2align 4 -L(arg_less_2pn27): - movss ARG_X, %xmm3 /* SP x */ - cmpl $0, %eax /* x=0? */ - je L(epilogue) /* in case x=0 return sin(+-0)==+-0 */ - /* Here if |x|<2^-27 */ - /* - * Special cases here: - * sin(subnormal) raises inexact/underflow - * sin(min_normalized) raises inexact/underflow - * sin(normalized) raises inexact - */ - movaps %xmm0, %xmm3 /* Copy of DP x */ - mulsd MO1(DP_SMALL), %xmm0 /* x*DP_SMALL */ - subsd %xmm0, %xmm3 /* Result is x-x*DP_SMALL */ - cvtsd2ss %xmm3, %xmm3 /* Result converted to SP */ - jmp L(epilogue) - - .p2align 4 -L(arg_inf_or_nan): - /* Here if |x| is Inf or NAN */ - jne L(skip_errno_setting) /* in case of x is NaN */ - - /* Here if x is Inf. Set errno to EDOM. */ - call JUMPTARGET(__errno_location) - movl $EDOM, (%eax) - - .p2align 4 -L(skip_errno_setting): - /* Here if |x| is Inf or NAN. Continued. */ - movss ARG_X, %xmm3 /* load x */ - subss %xmm3, %xmm3 /* Result is NaN */ - jmp L(epilogue) -END(__sinf_sse2) - - .section .rodata, "a" - .p2align 3 -L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */ - .long 0x00000000,0x00000000 - .long 0x54442d18,0x3fe921fb - .long 0x54442d18,0x3ff921fb - .long 0x7f3321d2,0x4002d97c - .long 0x54442d18,0x400921fb - .long 0x2955385e,0x400f6a7a - .long 0x7f3321d2,0x4012d97c - .long 0xe9bba775,0x4015fdbb - .long 0x54442d18,0x401921fb - .long 0xbeccb2bb,0x401c463a - .long 0x2955385e,0x401f6a7a - .type L(PIO4J), @object - ASM_SIZE_DIRECTIVE(L(PIO4J)) - - .p2align 3 -L(_FPI): /* 4/Pi broken into sum of positive DP values */ - .long 0x00000000,0x00000000 - .long 0x6c000000,0x3ff45f30 - .long 0x2a000000,0x3e3c9c88 - .long 0xa8000000,0x3c54fe13 - .long 0xd0000000,0x3aaf47d4 - .long 0x6c000000,0x38fbb81b - .long 0xe0000000,0x3714acc9 - .long 0x7c000000,0x3560e410 - .long 0x56000000,0x33bca2c7 - .long 0xac000000,0x31fbd778 - .long 0xe0000000,0x300b7246 - .long 0xe8000000,0x2e5d2126 - .long 0x48000000,0x2c970032 - .long 0xe8000000,0x2ad77504 - .long 0xe0000000,0x290921cf - .long 0xb0000000,0x274deb1c - .long 0xe0000000,0x25829a73 - .long 0xbe000000,0x23fd1046 - .long 0x10000000,0x2224baed - .long 0x8e000000,0x20709d33 - .long 0x80000000,0x1e535a2f - .long 0x64000000,0x1cef904e - .long 0x30000000,0x1b0d6398 - .long 0x24000000,0x1964ce7d - .long 0x16000000,0x17b908bf - .type L(_FPI), @object - ASM_SIZE_DIRECTIVE(L(_FPI)) - -/* Coefficients of polynomial - for sin(x)~=x+x^3*DP_SIN2_0+x^5*DP_SIN2_1, |x|<2^-5. */ - .p2align 3 -L(DP_SIN2_0): - .long 0x5543d49d,0xbfc55555 - .type L(DP_SIN2_0), @object - ASM_SIZE_DIRECTIVE(L(DP_SIN2_0)) - - .p2align 3 -L(DP_SIN2_1): - .long 0x75cec8c5,0x3f8110f4 - .type L(DP_SIN2_1), @object - ASM_SIZE_DIRECTIVE(L(DP_SIN2_1)) - - .p2align 3 -L(DP_ZERONE): - .long 0x00000000,0x00000000 /* 0.0 */ - .long 0x00000000,0xbff00000 /* 1.0 */ - .type L(DP_ZERONE), @object - ASM_SIZE_DIRECTIVE(L(DP_ZERONE)) - - .p2align 3 -L(DP_ONES): - .long 0x00000000,0x3ff00000 /* +1.0 */ - .long 0x00000000,0xbff00000 /* -1.0 */ - .type L(DP_ONES), @object - ASM_SIZE_DIRECTIVE(L(DP_ONES)) - -/* Coefficients of polynomial - for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4. */ - .p2align 3 -L(DP_S3): - .long 0x64e6b5b4,0x3ec71d72 - .type L(DP_S3), @object - ASM_SIZE_DIRECTIVE(L(DP_S3)) - - .p2align 3 -L(DP_S1): - .long 0x10c2688b,0x3f811111 - .type L(DP_S1), @object - ASM_SIZE_DIRECTIVE(L(DP_S1)) - - .p2align 3 -L(DP_S4): - .long 0x1674b58a,0xbe5a947e - .type L(DP_S4), @object - ASM_SIZE_DIRECTIVE(L(DP_S4)) - - .p2align 3 -L(DP_S2): - .long 0x8b4bd1f9,0xbf2a019f - .type L(DP_S2), @object - ASM_SIZE_DIRECTIVE(L(DP_S2)) - - .p2align 3 -L(DP_S0): - .long 0x55551cd9,0xbfc55555 - .type L(DP_S0), @object - ASM_SIZE_DIRECTIVE(L(DP_S0)) - - .p2align 3 -L(DP_SMALL): - .long 0x00000000,0x3cd00000 /* 2^(-50) */ - .type L(DP_SMALL), @object - ASM_SIZE_DIRECTIVE(L(DP_SMALL)) - -/* Coefficients of polynomial - for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4. */ - .p2align 3 -L(DP_C3): - .long 0x9ac43cc0,0x3efa00eb - .type L(DP_C3), @object - ASM_SIZE_DIRECTIVE(L(DP_C3)) - - .p2align 3 -L(DP_C1): - .long 0x545c50c7,0x3fa55555 - .type L(DP_C1), @object - ASM_SIZE_DIRECTIVE(L(DP_C1)) - - .p2align 3 -L(DP_C4): - .long 0xdd8844d7,0xbe923c97 - .type L(DP_C4), @object - ASM_SIZE_DIRECTIVE(L(DP_C4)) - - .p2align 3 -L(DP_C2): - .long 0x348b6874,0xbf56c16b - .type L(DP_C2), @object - ASM_SIZE_DIRECTIVE(L(DP_C2)) - - .p2align 3 -L(DP_C0): - .long 0xfffe98ae,0xbfdfffff - .type L(DP_C0), @object - ASM_SIZE_DIRECTIVE(L(DP_C0)) - - .p2align 3 -L(DP_PIO4): - .long 0x54442d18,0x3fe921fb /* Pi/4 */ - .type L(DP_PIO4), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4)) - - .p2align 3 -L(DP_2POW52): - .long 0x00000000,0x43300000 /* +2^52 */ - .long 0x00000000,0xc3300000 /* -2^52 */ - .type L(DP_2POW52), @object - ASM_SIZE_DIRECTIVE(L(DP_2POW52)) - - .p2align 3 -L(DP_INVPIO4): - .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */ - .type L(DP_INVPIO4), @object - ASM_SIZE_DIRECTIVE(L(DP_INVPIO4)) - - .p2align 3 -L(DP_PIO4HI): - .long 0x54000000,0xbfe921fb /* High part of Pi/4 */ - .type L(DP_PIO4HI), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4HI)) - - .p2align 3 -L(DP_PIO4LO): - .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */ - .type L(DP_PIO4LO), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4LO)) - - .p2align 2 -L(SP_INVPIO4): - .long 0x3fa2f983 /* 4/Pi */ - .type L(SP_INVPIO4), @object - ASM_SIZE_DIRECTIVE(L(SP_INVPIO4)) - - .p2align 4 -L(DP_ABS_MASK): /* Mask for getting DP absolute value */ - .long 0xffffffff,0x7fffffff - .long 0xffffffff,0x7fffffff - .type L(DP_ABS_MASK), @object - ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK)) - - .p2align 3 -L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */ - .long 0x00000000,0xffffffff - .type L(DP_HI_MASK), @object - ASM_SIZE_DIRECTIVE(L(DP_HI_MASK)) - -weak_alias (__sinf, sinf) diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sinf.c b/sysdeps/i386/i686/fpu/multiarch/s_sinf.c deleted file mode 100644 index 8ccdd2f34d..0000000000 --- a/sysdeps/i386/i686/fpu/multiarch/s_sinf.c +++ /dev/null @@ -1,28 +0,0 @@ -/* Multiple versions of sinf - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <init-arch.h> - -extern float __sinf_sse2 (float); -extern float __sinf_ia32 (float); -float __sinf (float); - -libm_ifunc (__sinf, HAS_CPU_FEATURE (SSE2) ? __sinf_sse2 : __sinf_ia32); -weak_alias (__sinf, sinf); -#define SINF __sinf_ia32 -#include <sysdeps/ieee754/flt-32/s_sinf.c> diff --git a/sysdeps/i386/i686/fpu/s_fmax.S b/sysdeps/i386/i686/fpu/s_fmax.S deleted file mode 100644 index ace8db9410..0000000000 --- a/sysdeps/i386/i686/fpu/s_fmax.S +++ /dev/null @@ -1,39 +0,0 @@ -/* Compute maximum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__fmax) - fldl 4(%esp) // x - fldl 12(%esp) // x : y - - fucomi %st(0), %st - fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise - - fxch - - fucomi %st(1), %st - fcmovb %st(1), %st - - fstp %st(1) - - ret -END(__fmax) -weak_alias (__fmax, fmax) diff --git a/sysdeps/i386/i686/fpu/s_fmaxf.S b/sysdeps/i386/i686/fpu/s_fmaxf.S deleted file mode 100644 index 3a25951a09..0000000000 --- a/sysdeps/i386/i686/fpu/s_fmaxf.S +++ /dev/null @@ -1,39 +0,0 @@ -/* Compute maximum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__fmaxf) - flds 4(%esp) // x - flds 8(%esp) // x : y - - fucomi %st(0), %st - fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise - - fxch - - fucomi %st(1), %st - fcmovb %st(1), %st - - fstp %st(1) - - ret -END(__fmaxf) -weak_alias (__fmaxf, fmaxf) diff --git a/sysdeps/i386/i686/fpu/s_fmaxl.S b/sysdeps/i386/i686/fpu/s_fmaxl.S deleted file mode 100644 index 3f6c21c63d..0000000000 --- a/sysdeps/i386/i686/fpu/s_fmaxl.S +++ /dev/null @@ -1,58 +0,0 @@ -/* Compute maximum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__fmaxl) - fldt 4(%esp) // x - fldt 16(%esp) // x : y - - fucomi %st(1), %st - jp 2f - fcmovb %st(1), %st - - fstp %st(1) - - ret - -2: // Unordered. - fucomi %st(0), %st - jp 3f - // st(1) is a NaN; st(0) is not. Test if st(1) is signaling. - testb $0x40, 11(%esp) - jz 4f - fstp %st(1) - ret - -3: // st(0) is a NaN; st(1) may or may not be. - fxch - fucomi %st(0), %st - jp 4f - // st(1) is a NaN; st(0) is not. Test if st(1) is signaling. - testb $0x40, 23(%esp) - jz 4f - fstp %st(1) - ret - -4: // Both arguments are NaNs, or one is a signaling NaN. - faddp - ret -END(__fmaxl) -weak_alias (__fmaxl, fmaxl) diff --git a/sysdeps/i386/i686/fpu/s_fmin.S b/sysdeps/i386/i686/fpu/s_fmin.S deleted file mode 100644 index 72d306fd79..0000000000 --- a/sysdeps/i386/i686/fpu/s_fmin.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Compute minimum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__fmin) - fldl 4(%esp) // x - fldl 12(%esp) // x : y - - fucomi %st(0), %st - fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise - - fucomi %st(1), %st - fcmovnb %st(1), %st - - fstp %st(1) - - ret -END(__fmin) -weak_alias (__fmin, fmin) diff --git a/sysdeps/i386/i686/fpu/s_fminf.S b/sysdeps/i386/i686/fpu/s_fminf.S deleted file mode 100644 index 52ea892bad..0000000000 --- a/sysdeps/i386/i686/fpu/s_fminf.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Compute minimum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__fminf) - flds 4(%esp) // x - flds 8(%esp) // x : y - - fucomi %st(0), %st - fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise - - fucomi %st(1), %st - fcmovnb %st(1), %st - - fstp %st(1) - - ret -END(__fminf) -weak_alias (__fminf, fminf) diff --git a/sysdeps/i386/i686/fpu/s_fminl.S b/sysdeps/i386/i686/fpu/s_fminl.S deleted file mode 100644 index e1cb83fed7..0000000000 --- a/sysdeps/i386/i686/fpu/s_fminl.S +++ /dev/null @@ -1,58 +0,0 @@ -/* Compute minimum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - .text -ENTRY(__fminl) - fldt 4(%esp) // x - fldt 16(%esp) // x : y - - fucomi %st(1), %st - jp 2f - fcmovnb %st(1), %st - - fstp %st(1) - - ret - -2: // Unordered. - fucomi %st(0), %st - jp 3f - // st(1) is a NaN; st(0) is not. Test if st(1) is signaling. - testb $0x40, 11(%esp) - jz 4f - fstp %st(1) - ret - -3: // st(0) is a NaN; st(1) may or may not be. - fxch - fucomi %st(0), %st - jp 4f - // st(1) is a NaN; st(0) is not. Test if st(1) is signaling. - testb $0x40, 23(%esp) - jz 4f - fstp %st(1) - ret - -4: // Both arguments are NaNs, or one is a signaling NaN. - faddp - ret -END(__fminl) -weak_alias (__fminl, fminl) diff --git a/sysdeps/i386/i686/hp-timing.h b/sysdeps/i386/i686/hp-timing.h deleted file mode 100644 index 1b11410feb..0000000000 --- a/sysdeps/i386/i686/hp-timing.h +++ /dev/null @@ -1,42 +0,0 @@ -/* High precision, low overhead timing functions. i686 version. - Copyright (C) 1998-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _HP_TIMING_H -#define _HP_TIMING_H 1 - -/* We always assume having the timestamp register. */ -#define HP_TIMING_AVAIL (1) -#define HP_SMALL_TIMING_AVAIL (1) - -/* We indeed have inlined functions. */ -#define HP_TIMING_INLINE (1) - -/* We use 64bit values for the times. */ -typedef unsigned long long int hp_timing_t; - -/* That's quite simple. Use the `rdtsc' instruction. Note that the value - might not be 100% accurate since there might be some more instructions - running in this moment. This could be changed by using a barrier like - 'cpuid' right before the `rdtsc' instruciton. But we are not interested - in accurate clock cycles here so we don't do this. */ -#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("rdtsc" : "=A" (Var)) - -#include <hp-timing-common.h> - -#endif /* hp-timing.h */ diff --git a/sysdeps/i386/i686/init-arch.h b/sysdeps/i386/i686/init-arch.h deleted file mode 100644 index f55f80efa0..0000000000 --- a/sysdeps/i386/i686/init-arch.h +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright (C) 2015-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define MINIMUM_ISA 686 -#include <sysdeps/x86/init-arch.h> diff --git a/sysdeps/i386/i686/memcmp.S b/sysdeps/i386/i686/memcmp.S deleted file mode 100644 index 5140ee2145..0000000000 --- a/sysdeps/i386/i686/memcmp.S +++ /dev/null @@ -1,408 +0,0 @@ -/* Compare two memory blocks for differences in the first COUNT bytes. - Copyright (C) 2004-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4+4 /* Preserve EBX. */ -#define BLK1 PARMS -#define BLK2 BLK1+4 -#define LEN BLK2+4 -#define ENTRANCE pushl %ebx; cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (ebx, 0) -#define RETURN popl %ebx; cfi_adjust_cfa_offset (-4); \ - cfi_restore (ebx); ret - -/* Load an entry in a jump table into EBX. TABLE is a jump table - with relative offsets. INDEX is a register contains the index - into the jump table. */ -#define LOAD_JUMP_TABLE_ENTRY(TABLE, INDEX) \ - /* We first load PC into EBX. */ \ - SETUP_PIC_REG(bx); \ - /* Get the address of the jump table. */ \ - addl $(TABLE - .), %ebx; \ - /* Get the entry and convert the relative offset to the \ - absolute address. */ \ - addl (%ebx,INDEX,4), %ebx - - .text - ALIGN (4) -ENTRY (memcmp) - ENTRANCE - - movl BLK1(%esp), %eax - movl BLK2(%esp), %edx - movl LEN(%esp), %ecx - - cmpl $1, %ecx - jne L(not_1) - movzbl (%eax), %ecx /* LEN == 1 */ - cmpb (%edx), %cl - jne L(neq) -L(bye): - xorl %eax, %eax - RETURN - - cfi_adjust_cfa_offset (4) - cfi_rel_offset (ebx, 0) -L(neq): - sbbl %eax, %eax - sbbl $-1, %eax - RETURN - - cfi_adjust_cfa_offset (4) - cfi_rel_offset (ebx, 0) -L(not_1): - jl L(bye) /* LEN == 0 */ - - pushl %esi - cfi_adjust_cfa_offset (4) - movl %eax, %esi - cfi_rel_offset (esi, 0) - cmpl $32, %ecx; - jge L(32bytesormore) /* LEN => 32 */ - - LOAD_JUMP_TABLE_ENTRY (L(table_32bytes), %ecx) - addl %ecx, %edx - addl %ecx, %esi - jmp *%ebx - - ALIGN (4) -L(28bytes): - movl -28(%esi), %eax - movl -28(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(24bytes): - movl -24(%esi), %eax - movl -24(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(20bytes): - movl -20(%esi), %eax - movl -20(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(16bytes): - movl -16(%esi), %eax - movl -16(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(12bytes): - movl -12(%esi), %eax - movl -12(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(8bytes): - movl -8(%esi), %eax - movl -8(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(4bytes): - movl -4(%esi), %eax - movl -4(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(0bytes): - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - xorl %eax, %eax - RETURN - - cfi_adjust_cfa_offset (8) - cfi_rel_offset (esi, 0) - cfi_rel_offset (ebx, 4) -L(29bytes): - movl -29(%esi), %eax - movl -29(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(25bytes): - movl -25(%esi), %eax - movl -25(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(21bytes): - movl -21(%esi), %eax - movl -21(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(17bytes): - movl -17(%esi), %eax - movl -17(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(13bytes): - movl -13(%esi), %eax - movl -13(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(9bytes): - movl -9(%esi), %eax - movl -9(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(5bytes): - movl -5(%esi), %eax - movl -5(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(1bytes): - movzbl -1(%esi), %eax - cmpb -1(%edx), %al - jne L(set) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - xorl %eax, %eax - RETURN - - cfi_adjust_cfa_offset (8) - cfi_rel_offset (esi, 0) - cfi_rel_offset (ebx, 4) -L(30bytes): - movl -30(%esi), %eax - movl -30(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(26bytes): - movl -26(%esi), %eax - movl -26(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(22bytes): - movl -22(%esi), %eax - movl -22(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(18bytes): - movl -18(%esi), %eax - movl -18(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(14bytes): - movl -14(%esi), %eax - movl -14(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(10bytes): - movl -10(%esi), %eax - movl -10(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(6bytes): - movl -6(%esi), %eax - movl -6(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(2bytes): - movzwl -2(%esi), %eax - movzwl -2(%edx), %ecx - cmpb %cl, %al - jne L(set) - cmpl %ecx, %eax - jne L(set) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - xorl %eax, %eax - RETURN - - cfi_adjust_cfa_offset (8) - cfi_rel_offset (esi, 0) - cfi_rel_offset (ebx, 4) -L(31bytes): - movl -31(%esi), %eax - movl -31(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(27bytes): - movl -27(%esi), %eax - movl -27(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(23bytes): - movl -23(%esi), %eax - movl -23(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(19bytes): - movl -19(%esi), %eax - movl -19(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(15bytes): - movl -15(%esi), %eax - movl -15(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(11bytes): - movl -11(%esi), %eax - movl -11(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(7bytes): - movl -7(%esi), %eax - movl -7(%edx), %ecx - cmpl %ecx, %eax - jne L(find_diff) -L(3bytes): - movzwl -3(%esi), %eax - movzwl -3(%edx), %ecx - cmpb %cl, %al - jne L(set) - cmpl %ecx, %eax - jne L(set) - movzbl -1(%esi), %eax - cmpb -1(%edx), %al - jne L(set) - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - xorl %eax, %eax - RETURN - - cfi_adjust_cfa_offset (8) - cfi_rel_offset (esi, 0) - cfi_rel_offset (ebx, 4) - ALIGN (4) -/* ECX >= 32. */ -L(32bytesormore): - subl $32, %ecx - - movl (%esi), %eax - cmpl (%edx), %eax - jne L(load_ecx) - - movl 4(%esi), %eax - cmpl 4(%edx), %eax - jne L(load_ecx_4) - - movl 8(%esi), %eax - cmpl 8(%edx), %eax - jne L(load_ecx_8) - - movl 12(%esi), %eax - cmpl 12(%edx), %eax - jne L(load_ecx_12) - - movl 16(%esi), %eax - cmpl 16(%edx), %eax - jne L(load_ecx_16) - - movl 20(%esi), %eax - cmpl 20(%edx), %eax - jne L(load_ecx_20) - - movl 24(%esi), %eax - cmpl 24(%edx), %eax - jne L(load_ecx_24) - - movl 28(%esi), %eax - cmpl 28(%edx), %eax - jne L(load_ecx_28) - - addl $32, %esi - addl $32, %edx - cmpl $32, %ecx - jge L(32bytesormore) - - LOAD_JUMP_TABLE_ENTRY (L(table_32bytes), %ecx) - addl %ecx, %edx - addl %ecx, %esi - jmp *%ebx - -L(load_ecx_28): - addl $0x4, %edx -L(load_ecx_24): - addl $0x4, %edx -L(load_ecx_20): - addl $0x4, %edx -L(load_ecx_16): - addl $0x4, %edx -L(load_ecx_12): - addl $0x4, %edx -L(load_ecx_8): - addl $0x4, %edx -L(load_ecx_4): - addl $0x4, %edx -L(load_ecx): - movl (%edx), %ecx - -L(find_diff): - cmpb %cl, %al - jne L(set) - cmpb %ch, %ah - jne L(set) - shrl $16,%eax - shrl $16,%ecx - cmpb %cl, %al - jne L(set) - /* We get there only if we already know there is a - difference. */ - cmpl %ecx, %eax -L(set): - sbbl %eax, %eax - sbbl $-1, %eax - popl %esi - cfi_adjust_cfa_offset (-4) - cfi_restore (esi) - RETURN -END (memcmp) - - .section .rodata - ALIGN (2) -L(table_32bytes) : - .long L(0bytes) - L(table_32bytes) - .long L(1bytes) - L(table_32bytes) - .long L(2bytes) - L(table_32bytes) - .long L(3bytes) - L(table_32bytes) - .long L(4bytes) - L(table_32bytes) - .long L(5bytes) - L(table_32bytes) - .long L(6bytes) - L(table_32bytes) - .long L(7bytes) - L(table_32bytes) - .long L(8bytes) - L(table_32bytes) - .long L(9bytes) - L(table_32bytes) - .long L(10bytes) - L(table_32bytes) - .long L(11bytes) - L(table_32bytes) - .long L(12bytes) - L(table_32bytes) - .long L(13bytes) - L(table_32bytes) - .long L(14bytes) - L(table_32bytes) - .long L(15bytes) - L(table_32bytes) - .long L(16bytes) - L(table_32bytes) - .long L(17bytes) - L(table_32bytes) - .long L(18bytes) - L(table_32bytes) - .long L(19bytes) - L(table_32bytes) - .long L(20bytes) - L(table_32bytes) - .long L(21bytes) - L(table_32bytes) - .long L(22bytes) - L(table_32bytes) - .long L(23bytes) - L(table_32bytes) - .long L(24bytes) - L(table_32bytes) - .long L(25bytes) - L(table_32bytes) - .long L(26bytes) - L(table_32bytes) - .long L(27bytes) - L(table_32bytes) - .long L(28bytes) - L(table_32bytes) - .long L(29bytes) - L(table_32bytes) - .long L(30bytes) - L(table_32bytes) - .long L(31bytes) - L(table_32bytes) - - -#undef bcmp -weak_alias (memcmp, bcmp) -libc_hidden_builtin_def (memcmp) diff --git a/sysdeps/i386/i686/memcpy.S b/sysdeps/i386/i686/memcpy.S deleted file mode 100644 index 1d61447430..0000000000 --- a/sysdeps/i386/i686/memcpy.S +++ /dev/null @@ -1,98 +0,0 @@ -/* Copy memory block and return pointer to beginning of destination block - For Intel 80x86, x>=6. - This file is part of the GNU C Library. - Copyright (C) 1999-2017 Free Software Foundation, Inc. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4 /* no space for saved regs */ -#define RTN PARMS -#define DEST RTN -#define SRC DEST+4 -#define LEN SRC+4 - - .text -#if defined PIC && IS_IN (libc) -ENTRY_CHK (__memcpy_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END_CHK (__memcpy_chk) -#endif -ENTRY (memcpy) - - movl %edi, %eax - movl DEST(%esp), %edi - movl %esi, %edx - movl SRC(%esp), %esi - - movl %edi, %ecx - xorl %esi, %ecx - andl $3, %ecx - movl LEN(%esp), %ecx - cld - jne .Lunaligned - - cmpl $3, %ecx - jbe .Lunaligned - - testl $3, %esi - je 1f - movsb - decl %ecx - testl $3, %esi - je 1f - movsb - decl %ecx - testl $3, %esi - je 1f - movsb - decl %ecx -1: pushl %eax - movl %ecx, %eax - shrl $2, %ecx - andl $3, %eax - rep - movsl - movl %eax, %ecx - rep - movsb - popl %eax - -.Lend: movl %eax, %edi - movl %edx, %esi - movl DEST(%esp), %eax - - ret - - /* When we come here the pointers do not have the same - alignment or the length is too short. No need to optimize for - aligned memory accesses. */ -.Lunaligned: - shrl $1, %ecx - jnc 1f - movsb -1: shrl $1, %ecx - jnc 2f - movsw -2: rep - movsl - jmp .Lend -END (memcpy) -libc_hidden_builtin_def (memcpy) diff --git a/sysdeps/i386/i686/memmove.S b/sysdeps/i386/i686/memmove.S deleted file mode 100644 index f60c3d501b..0000000000 --- a/sysdeps/i386/i686/memmove.S +++ /dev/null @@ -1,120 +0,0 @@ -/* Copy memory block and return pointer to beginning of destination block - For Intel 80x86, x>=6. - This file is part of the GNU C Library. - Copyright (C) 2003-2017 Free Software Foundation, Inc. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 2003. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4+4 /* one spilled register */ -#define RTN PARMS - - .text - -#ifdef USE_AS_BCOPY -# define SRC RTN -# define DEST SRC+4 -# define LEN DEST+4 -#else -# define DEST RTN -# define SRC DEST+4 -# define LEN SRC+4 - -# if defined PIC && IS_IN (libc) -ENTRY_CHK (__memmove_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END_CHK (__memmove_chk) -# endif -#endif - -ENTRY (memmove) - - pushl %edi - cfi_adjust_cfa_offset (4) - - movl LEN(%esp), %ecx - movl DEST(%esp), %edi - cfi_rel_offset (edi, 0) - movl %esi, %edx - movl SRC(%esp), %esi - cfi_register (esi, edx) - - movl %edi, %eax - subl %esi, %eax - cmpl %eax, %ecx - ja 3f - - cld - shrl $1, %ecx - jnc 1f - movsb -1: shrl $1, %ecx - jnc 2f - movsw -2: rep - movsl - movl %edx, %esi - cfi_restore (esi) -#ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -#endif - - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret - - cfi_adjust_cfa_offset (4) - cfi_rel_offset (edi, 0) - cfi_register (esi, edx) - - /* Backward copying. */ -3: std - leal -1(%edi, %ecx), %edi - leal -1(%esi, %ecx), %esi - shrl $1, %ecx - jnc 1f - movsb -1: subl $1, %edi - subl $1, %esi - shrl $1, %ecx - jnc 2f - movsw -2: subl $2, %edi - subl $2, %esi - rep - movsl - movl %edx, %esi - cfi_restore (esi) -#ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -#endif - - cld - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (memmove) -#ifndef USE_AS_BCOPY -libc_hidden_builtin_def (memmove) -#endif diff --git a/sysdeps/i386/i686/mempcpy.S b/sysdeps/i386/i686/mempcpy.S deleted file mode 100644 index 31cb4efdb2..0000000000 --- a/sysdeps/i386/i686/mempcpy.S +++ /dev/null @@ -1,65 +0,0 @@ -/* Copy memory block and return pointer to following byte. - For Intel 80x86, x>=6. - This file is part of the GNU C Library. - Copyright (C) 1998-2017 Free Software Foundation, Inc. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4 /* no space for saved regs */ -#define RTN PARMS -#define DEST RTN -#define SRC DEST+4 -#define LEN SRC+4 - - .text -#if defined PIC && IS_IN (libc) -ENTRY_CHK (__mempcpy_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END_CHK (__mempcpy_chk) -#endif -ENTRY (__mempcpy) - - movl LEN(%esp), %ecx - movl %edi, %eax - cfi_register (edi, eax) - movl DEST(%esp), %edi - movl %esi, %edx - cfi_register (esi, edx) - movl SRC(%esp), %esi - cld - shrl $1, %ecx - jnc 1f - movsb -1: shrl $1, %ecx - jnc 2f - movsw -2: rep - movsl - xchgl %edi, %eax - cfi_restore (edi) - movl %edx, %esi - cfi_restore (esi) - - ret -END (__mempcpy) -libc_hidden_def (__mempcpy) -weak_alias (__mempcpy, mempcpy) -libc_hidden_builtin_def (mempcpy) diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S deleted file mode 100644 index 24d06178d2..0000000000 --- a/sysdeps/i386/i686/memset.S +++ /dev/null @@ -1,100 +0,0 @@ -/* memset/bzero -- set memory area to CH/0 - Highly optimized version for ix86, x>=6. - Copyright (C) 1999-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4+4 /* space for 1 saved reg */ -#ifdef USE_AS_BZERO -# define DEST PARMS -# define LEN DEST+4 -#else -# define RTN PARMS -# define DEST RTN -# define CHR DEST+4 -# define LEN CHR+4 -#endif - - .text -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO -ENTRY_CHK (__memset_chk) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END_CHK (__memset_chk) -#endif -ENTRY (memset) - - cld - pushl %edi - cfi_adjust_cfa_offset (4) - movl DEST(%esp), %edx - movl LEN(%esp), %ecx -#ifdef USE_AS_BZERO - xorl %eax, %eax /* fill with 0 */ -#else - movzbl CHR(%esp), %eax -#endif - jecxz 1f - movl %edx, %edi - cfi_rel_offset (edi, 0) - andl $3, %edx - jz 2f /* aligned */ - jp 3f /* misaligned at 3, store just one byte below */ - stosb /* misaligned at 1 or 2, store two bytes */ - decl %ecx - jz 1f -3: stosb - decl %ecx - jz 1f - xorl $1, %edx - jnz 2f /* was misaligned at 2 or 3, now aligned */ - stosb /* was misaligned at 1, store third byte */ - decl %ecx -2: movl %ecx, %edx - shrl $2, %ecx - andl $3, %edx -#ifndef USE_AS_BZERO - imul $0x01010101, %eax -#endif - rep - stosl - movl %edx, %ecx - rep - stosb - -1: -#ifndef USE_AS_BZERO - movl DEST(%esp), %eax /* start address of destination is result */ -#endif - popl %edi - cfi_adjust_cfa_offset (-4) - cfi_restore (edi) - - ret -END (memset) -libc_hidden_builtin_def (memset) - -#if defined SHARED && IS_IN (libc) && !defined __memset_chk \ - && !defined USE_AS_BZERO -strong_alias (__memset_chk, __memset_zero_constant_len_parameter) - .section .gnu.warning.__memset_zero_constant_len_parameter - .string "memset used with constant zero length parameter; this could be due to transposed parameters" -#endif diff --git a/sysdeps/i386/i686/memusage.h b/sysdeps/i386/i686/memusage.h deleted file mode 100644 index 77a020d7c0..0000000000 --- a/sysdeps/i386/i686/memusage.h +++ /dev/null @@ -1,21 +0,0 @@ -/* Copyright (C) 2000-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define GETSP() ({ register uintptr_t stack_ptr asm ("esp"); stack_ptr; }) -#define GETTIME(low,high) asm ("rdtsc" : "=a" (low), "=d" (high)) - -#include <sysdeps/generic/memusage.h> diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile deleted file mode 100644 index 4a0c20c051..0000000000 --- a/sysdeps/i386/i686/multiarch/Makefile +++ /dev/null @@ -1,44 +0,0 @@ -ifeq ($(subdir),csu) -tests += test-multiarch -endif - -ifeq ($(subdir),string) -gen-as-const-headers += locale-defines.sym -sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \ - memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \ - memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \ - memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \ - strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \ - memcmp-ssse3 memcmp-sse4 varshift \ - strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \ - strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \ - strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \ - strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c \ - strchr-sse2 strrchr-sse2 strchr-sse2-bsf strrchr-sse2-bsf \ - memchr-sse2 memchr-sse2-bsf \ - memrchr-sse2 memrchr-sse2-bsf memrchr-c \ - rawmemchr-sse2 rawmemchr-sse2-bsf \ - strnlen-sse2 strnlen-c \ - strcasecmp_l-c strcasecmp-c strcasecmp_l-ssse3 \ - strncase_l-c strncase-c strncase_l-ssse3 \ - strcasecmp_l-sse4 strncase_l-sse4 \ - bcopy-sse2-unaligned memcpy-sse2-unaligned \ - mempcpy-sse2-unaligned memmove-sse2-unaligned \ - strcspn-c strpbrk-c strspn-c -CFLAGS-varshift.c += -msse4 -CFLAGS-strcspn-c.c += -msse4 -CFLAGS-strpbrk-c.c += -msse4 -CFLAGS-strspn-c.c += -msse4 -endif - -ifeq ($(subdir),wcsmbs) -sysdep_routines += wcscmp-sse2 wcscmp-c wcslen-sse2 wcslen-c \ - wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wcschr-sse2 \ - wcschr-c wcsrchr-sse2 wcsrchr-c wcscpy-ssse3 wcscpy-c -endif - -ifeq ($(subdir),math) -libm-sysdep_routines += s_fma-fma s_fmaf-fma -CFLAGS-s_fma-fma.c += -mavx -mfpmath=sse -CFLAGS-s_fmaf-fma.c += -mavx -mfpmath=sse -endif diff --git a/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S deleted file mode 100644 index efef2a10dd..0000000000 --- a/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMMOVE -#define USE_AS_BCOPY -#define MEMCPY __bcopy_sse2_unaligned -#include "memcpy-sse2-unaligned.S" diff --git a/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S b/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S deleted file mode 100644 index cbc8b420e8..0000000000 --- a/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMMOVE -#define USE_AS_BCOPY -#define MEMCPY __bcopy_ssse3_rep -#include "memcpy-ssse3-rep.S" diff --git a/sysdeps/i386/i686/multiarch/bcopy-ssse3.S b/sysdeps/i386/i686/multiarch/bcopy-ssse3.S deleted file mode 100644 index 36aac44b9c..0000000000 --- a/sysdeps/i386/i686/multiarch/bcopy-ssse3.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMMOVE -#define USE_AS_BCOPY -#define MEMCPY __bcopy_ssse3 -#include "memcpy-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/bcopy.S b/sysdeps/i386/i686/multiarch/bcopy.S deleted file mode 100644 index 877f82c28f..0000000000 --- a/sysdeps/i386/i686/multiarch/bcopy.S +++ /dev/null @@ -1,59 +0,0 @@ -/* Multiple versions of bcopy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib. */ -#if IS_IN (libc) - .text -ENTRY(bcopy) - .type bcopy, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__bcopy_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__bcopy_sse2_unaligned) - HAS_ARCH_FEATURE (Fast_Unaligned_Load) - jnz 2f - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__bcopy_ssse3) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__bcopy_ssse3_rep) -2: ret -END(bcopy) - -# undef ENTRY -# define ENTRY(name) \ - .type __bcopy_ia32, @function; \ - .p2align 4; \ - .globl __bcopy_ia32; \ - .hidden __bcopy_ia32; \ - __bcopy_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __bcopy_ia32, .-__bcopy_ia32 - -#endif - -#include "../bcopy.S" diff --git a/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S b/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S deleted file mode 100644 index 507b288bb3..0000000000 --- a/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_BZERO -#define __memset_sse2_rep __bzero_sse2_rep -#include "memset-sse2-rep.S" diff --git a/sysdeps/i386/i686/multiarch/bzero-sse2.S b/sysdeps/i386/i686/multiarch/bzero-sse2.S deleted file mode 100644 index 8d04512e4e..0000000000 --- a/sysdeps/i386/i686/multiarch/bzero-sse2.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_BZERO -#define __memset_sse2 __bzero_sse2 -#include "memset-sse2.S" diff --git a/sysdeps/i386/i686/multiarch/bzero.S b/sysdeps/i386/i686/multiarch/bzero.S deleted file mode 100644 index 9dac490aa2..0000000000 --- a/sysdeps/i386/i686/multiarch/bzero.S +++ /dev/null @@ -1,62 +0,0 @@ -/* Multiple versions of bzero - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib. */ -#if IS_IN (libc) - .text -ENTRY(__bzero) - .type __bzero, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__bzero_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX ( __bzero_sse2) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__bzero_sse2_rep) -2: ret -END(__bzero) - -# undef ENTRY -# define ENTRY(name) \ - .type __bzero_ia32, @function; \ - .p2align 4; \ - .globl __bzero_ia32; \ - .hidden __bzero_ia32; \ - __bzero_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __bzero_ia32, .-__bzero_ia32 - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI___bzero; __GI___bzero = __bzero_ia32 -# endif -#endif - -#include "../bzero.S" diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c deleted file mode 100644 index e8026a2a78..0000000000 --- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c +++ /dev/null @@ -1,376 +0,0 @@ -/* Enumerate available IFUNC implementations of a function. i686 version. - Copyright (C) 2012-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <assert.h> -#include <string.h> -#include <wchar.h> -#include <ifunc-impl-list.h> -#include "init-arch.h" - -/* Maximum number of IFUNC implementations. */ -#define MAX_IFUNC 4 - -/* Fill ARRAY of MAX elements with IFUNC implementations for function - NAME and return the number of valid entries. */ - -size_t -__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - size_t max) -{ - assert (max >= MAX_IFUNC); - - size_t i = 0; - - /* Support sysdeps/i386/i686/multiarch/bcopy.S. */ - IFUNC_IMPL (i, name, bcopy, - IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSSE3), - __bcopy_ssse3_rep) - IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSSE3), - __bcopy_ssse3) - IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSE2), - __bcopy_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32)) - - /* Support sysdeps/i386/i686/multiarch/bzero.S. */ - IFUNC_IMPL (i, name, bzero, - IFUNC_IMPL_ADD (array, i, bzero, HAS_CPU_FEATURE (SSE2), - __bzero_sse2_rep) - IFUNC_IMPL_ADD (array, i, bzero, HAS_CPU_FEATURE (SSE2), - __bzero_sse2) - IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ia32)) - - /* Support sysdeps/i386/i686/multiarch/memchr.S. */ - IFUNC_IMPL (i, name, memchr, - IFUNC_IMPL_ADD (array, i, memchr, HAS_CPU_FEATURE (SSE2), - __memchr_sse2_bsf) - IFUNC_IMPL_ADD (array, i, memchr, HAS_CPU_FEATURE (SSE2), - __memchr_sse2) - IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_ia32)) - - /* Support sysdeps/i386/i686/multiarch/memcmp.S. */ - IFUNC_IMPL (i, name, memcmp, - IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSE4_2), - __memcmp_sse4_2) - IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSSE3), - __memcmp_ssse3) - IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_ia32)) - - /* Support sysdeps/i386/i686/multiarch/memmove_chk.S. */ - IFUNC_IMPL (i, name, __memmove_chk, - IFUNC_IMPL_ADD (array, i, __memmove_chk, - HAS_CPU_FEATURE (SSSE3), - __memmove_chk_ssse3_rep) - IFUNC_IMPL_ADD (array, i, __memmove_chk, - HAS_CPU_FEATURE (SSSE3), - __memmove_chk_ssse3) - IFUNC_IMPL_ADD (array, i, __memmove_chk, - HAS_CPU_FEATURE (SSE2), - __memmove_chk_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, __memmove_chk, 1, - __memmove_chk_ia32)) - - /* Support sysdeps/i386/i686/multiarch/memmove.S. */ - IFUNC_IMPL (i, name, memmove, - IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3), - __memmove_ssse3_rep) - IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3), - __memmove_ssse3) - IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSE2), - __memmove_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32)) - - /* Support sysdeps/i386/i686/multiarch/memrchr.S. */ - IFUNC_IMPL (i, name, memrchr, - IFUNC_IMPL_ADD (array, i, memrchr, HAS_CPU_FEATURE (SSE2), - __memrchr_sse2_bsf) - IFUNC_IMPL_ADD (array, i, memrchr, HAS_CPU_FEATURE (SSE2), - __memrchr_sse2) - IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_ia32)) - - /* Support sysdeps/i386/i686/multiarch/memset_chk.S. */ - IFUNC_IMPL (i, name, __memset_chk, - IFUNC_IMPL_ADD (array, i, __memset_chk, - HAS_CPU_FEATURE (SSE2), - __memset_chk_sse2_rep) - IFUNC_IMPL_ADD (array, i, __memset_chk, - HAS_CPU_FEATURE (SSE2), - __memset_chk_sse2) - IFUNC_IMPL_ADD (array, i, __memset_chk, 1, - __memset_chk_ia32)) - - /* Support sysdeps/i386/i686/multiarch/memset.S. */ - IFUNC_IMPL (i, name, memset, - IFUNC_IMPL_ADD (array, i, memset, HAS_CPU_FEATURE (SSE2), - __memset_sse2_rep) - IFUNC_IMPL_ADD (array, i, memset, HAS_CPU_FEATURE (SSE2), - __memset_sse2) - IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ia32)) - - /* Support sysdeps/i386/i686/multiarch/rawmemchr.S. */ - IFUNC_IMPL (i, name, rawmemchr, - IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_CPU_FEATURE (SSE2), - __rawmemchr_sse2_bsf) - IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_CPU_FEATURE (SSE2), - __rawmemchr_sse2) - IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_ia32)) - - /* Support sysdeps/i386/i686/multiarch/stpncpy.S. */ - IFUNC_IMPL (i, name, stpncpy, - IFUNC_IMPL_ADD (array, i, stpncpy, HAS_CPU_FEATURE (SSSE3), - __stpncpy_ssse3) - IFUNC_IMPL_ADD (array, i, stpncpy, HAS_CPU_FEATURE (SSE2), - __stpncpy_sse2) - IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_ia32)) - - /* Support sysdeps/i386/i686/multiarch/stpcpy.S. */ - IFUNC_IMPL (i, name, stpcpy, - IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSSE3), - __stpcpy_ssse3) - IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSE2), - __stpcpy_sse2) - IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strcasecmp.S. */ - IFUNC_IMPL (i, name, strcasecmp, - IFUNC_IMPL_ADD (array, i, strcasecmp, - HAS_CPU_FEATURE (SSE4_2), - __strcasecmp_sse4_2) - IFUNC_IMPL_ADD (array, i, strcasecmp, - HAS_CPU_FEATURE (SSSE3), - __strcasecmp_ssse3) - IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strcasecmp_l.S. */ - IFUNC_IMPL (i, name, strcasecmp_l, - IFUNC_IMPL_ADD (array, i, strcasecmp_l, - HAS_CPU_FEATURE (SSE4_2), - __strcasecmp_l_sse4_2) - IFUNC_IMPL_ADD (array, i, strcasecmp_l, - HAS_CPU_FEATURE (SSSE3), - __strcasecmp_l_ssse3) - IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1, - __strcasecmp_l_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strcat.S. */ - IFUNC_IMPL (i, name, strcat, - IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSSE3), - __strcat_ssse3) - IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSE2), - __strcat_sse2) - IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strchr.S. */ - IFUNC_IMPL (i, name, strchr, - IFUNC_IMPL_ADD (array, i, strchr, HAS_CPU_FEATURE (SSE2), - __strchr_sse2_bsf) - IFUNC_IMPL_ADD (array, i, strchr, HAS_CPU_FEATURE (SSE2), - __strchr_sse2) - IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strcmp.S. */ - IFUNC_IMPL (i, name, strcmp, - IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2), - __strcmp_sse4_2) - IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSSE3), - __strcmp_ssse3) - IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strcpy.S. */ - IFUNC_IMPL (i, name, strcpy, - IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSSE3), - __strcpy_ssse3) - IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSE2), - __strcpy_sse2) - IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strcspn.S. */ - IFUNC_IMPL (i, name, strcspn, - IFUNC_IMPL_ADD (array, i, strcspn, HAS_CPU_FEATURE (SSE4_2), - __strcspn_sse42) - IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strncase.S. */ - IFUNC_IMPL (i, name, strncasecmp, - IFUNC_IMPL_ADD (array, i, strncasecmp, - HAS_CPU_FEATURE (SSE4_2), - __strncasecmp_sse4_2) - IFUNC_IMPL_ADD (array, i, strncasecmp, - HAS_CPU_FEATURE (SSSE3), - __strncasecmp_ssse3) - IFUNC_IMPL_ADD (array, i, strncasecmp, 1, - __strncasecmp_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strncase_l.S. */ - IFUNC_IMPL (i, name, strncasecmp_l, - IFUNC_IMPL_ADD (array, i, strncasecmp_l, - HAS_CPU_FEATURE (SSE4_2), - __strncasecmp_l_sse4_2) - IFUNC_IMPL_ADD (array, i, strncasecmp_l, - HAS_CPU_FEATURE (SSSE3), - __strncasecmp_l_ssse3) - IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1, - __strncasecmp_l_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strncat.S. */ - IFUNC_IMPL (i, name, strncat, - IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSSE3), - __strncat_ssse3) - IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSE2), - __strncat_sse2) - IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strncpy.S. */ - IFUNC_IMPL (i, name, strncpy, - IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSSE3), - __strncpy_ssse3) - IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSE2), - __strncpy_sse2) - IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strnlen.S. */ - IFUNC_IMPL (i, name, strnlen, - IFUNC_IMPL_ADD (array, i, strnlen, HAS_CPU_FEATURE (SSE2), - __strnlen_sse2) - IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strpbrk.S. */ - IFUNC_IMPL (i, name, strpbrk, - IFUNC_IMPL_ADD (array, i, strpbrk, HAS_CPU_FEATURE (SSE4_2), - __strpbrk_sse42) - IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strrchr.S. */ - IFUNC_IMPL (i, name, strrchr, - IFUNC_IMPL_ADD (array, i, strrchr, HAS_CPU_FEATURE (SSE2), - __strrchr_sse2_bsf) - IFUNC_IMPL_ADD (array, i, strrchr, HAS_CPU_FEATURE (SSE2), - __strrchr_sse2) - IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strspn.S. */ - IFUNC_IMPL (i, name, strspn, - IFUNC_IMPL_ADD (array, i, strspn, HAS_CPU_FEATURE (SSE4_2), - __strspn_sse42) - IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_ia32)) - - /* Support sysdeps/i386/i686/multiarch/wcschr.S. */ - IFUNC_IMPL (i, name, wcschr, - IFUNC_IMPL_ADD (array, i, wcschr, HAS_CPU_FEATURE (SSE2), - __wcschr_sse2) - IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_ia32)) - - /* Support sysdeps/i386/i686/multiarch/wcscmp.S. */ - IFUNC_IMPL (i, name, wcscmp, - IFUNC_IMPL_ADD (array, i, wcscmp, HAS_CPU_FEATURE (SSE2), - __wcscmp_sse2) - IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_ia32)) - - /* Support sysdeps/i386/i686/multiarch/wcscpy.S. */ - IFUNC_IMPL (i, name, wcscpy, - IFUNC_IMPL_ADD (array, i, wcscpy, HAS_CPU_FEATURE (SSSE3), - __wcscpy_ssse3) - IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_ia32)) - - /* Support sysdeps/i386/i686/multiarch/wcslen.S. */ - IFUNC_IMPL (i, name, wcslen, - IFUNC_IMPL_ADD (array, i, wcslen, HAS_CPU_FEATURE (SSE2), - __wcslen_sse2) - IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_ia32)) - - /* Support sysdeps/i386/i686/multiarch/wcsrchr.S. */ - IFUNC_IMPL (i, name, wcsrchr, - IFUNC_IMPL_ADD (array, i, wcsrchr, HAS_CPU_FEATURE (SSE2), - __wcsrchr_sse2) - IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_ia32)) - - /* Support sysdeps/i386/i686/multiarch/wmemcmp.S. */ - IFUNC_IMPL (i, name, wmemcmp, - IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSE4_2), - __wmemcmp_sse4_2) - IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSSE3), - __wmemcmp_ssse3) - IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_ia32)) - -#ifdef SHARED - /* Support sysdeps/i386/i686/multiarch/memcpy_chk.S. */ - IFUNC_IMPL (i, name, __memcpy_chk, - IFUNC_IMPL_ADD (array, i, __memcpy_chk, - HAS_CPU_FEATURE (SSSE3), - __memcpy_chk_ssse3_rep) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, - HAS_CPU_FEATURE (SSSE3), - __memcpy_chk_ssse3) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, - HAS_CPU_FEATURE (SSE2), - __memcpy_chk_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1, - __memcpy_chk_ia32)) - - /* Support sysdeps/i386/i686/multiarch/memcpy.S. */ - IFUNC_IMPL (i, name, memcpy, - IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3), - __memcpy_ssse3_rep) - IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3), - __memcpy_ssse3) - IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSE2), - __memcpy_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32)) - - /* Support sysdeps/i386/i686/multiarch/mempcpy_chk.S. */ - IFUNC_IMPL (i, name, __mempcpy_chk, - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - HAS_CPU_FEATURE (SSSE3), - __mempcpy_chk_ssse3_rep) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - HAS_CPU_FEATURE (SSSE3), - __mempcpy_chk_ssse3) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - HAS_CPU_FEATURE (SSE2), - __mempcpy_chk_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1, - __mempcpy_chk_ia32)) - - /* Support sysdeps/i386/i686/multiarch/mempcpy.S. */ - IFUNC_IMPL (i, name, mempcpy, - IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3), - __mempcpy_ssse3_rep) - IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3), - __mempcpy_ssse3) - IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSE2), - __mempcpy_sse2_unaligned) - IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strlen.S. */ - IFUNC_IMPL (i, name, strlen, - IFUNC_IMPL_ADD (array, i, strlen, HAS_CPU_FEATURE (SSE2), - __strlen_sse2_bsf) - IFUNC_IMPL_ADD (array, i, strlen, HAS_CPU_FEATURE (SSE2), - __strlen_sse2) - IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_ia32)) - - /* Support sysdeps/i386/i686/multiarch/strncmp.S. */ - IFUNC_IMPL (i, name, strncmp, - IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSE4_2), - __strncmp_sse4_2) - IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3), - __strncmp_ssse3) - IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_ia32)) -#endif - - return i; -} diff --git a/sysdeps/i386/i686/multiarch/locale-defines.sym b/sysdeps/i386/i686/multiarch/locale-defines.sym deleted file mode 100644 index aebff9a4f9..0000000000 --- a/sysdeps/i386/i686/multiarch/locale-defines.sym +++ /dev/null @@ -1,11 +0,0 @@ -#include <locale/localeinfo.h> -#include <langinfo.h> -#include <stddef.h> - --- - -LOCALE_T___LOCALES offsetof (struct __locale_struct, __locales) -LC_CTYPE -_NL_CTYPE_NONASCII_CASE -LOCALE_DATA_VALUES offsetof (struct __locale_data, values) -SIZEOF_VALUES sizeof (((struct __locale_data *) 0)->values[0]) diff --git a/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S deleted file mode 100644 index dd316486e6..0000000000 --- a/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S +++ /dev/null @@ -1,502 +0,0 @@ -/* Optimized memchr with sse2 - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 4 -# define STR1 PARMS -# define STR2 STR1+4 - -# ifndef USE_AS_RAWMEMCHR -# define LEN STR2+4 -# define RETURN POP(%edi); ret; CFI_PUSH(%edi); -# endif - -# ifndef MEMCHR -# define MEMCHR __memchr_sse2_bsf -# endif - - .text -ENTRY (MEMCHR) - - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - -# ifndef USE_AS_RAWMEMCHR - mov LEN(%esp), %edx - test %edx, %edx - jz L(return_null_1) -# endif - mov %ecx, %eax - - punpcklbw %xmm1, %xmm1 - punpcklbw %xmm1, %xmm1 - - and $63, %ecx - pshufd $0, %xmm1, %xmm1 - - cmp $48, %ecx - ja L(crosscache) - - movdqu (%eax), %xmm0 - pcmpeqb %xmm1, %xmm0 -/* Check if there is a match. */ - pmovmskb %xmm0, %ecx - test %ecx, %ecx - je L(unaligned_no_match_1) -/* Check which byte is a match. */ - bsf %ecx, %ecx - -# ifndef USE_AS_RAWMEMCHR - sub %ecx, %edx - jbe L(return_null_1) -# endif - add %ecx, %eax - ret - - .p2align 4 -L(unaligned_no_match_1): -# ifndef USE_AS_RAWMEMCHR - sub $16, %edx - jbe L(return_null_1) - PUSH (%edi) - lea 16(%eax), %edi - and $15, %eax - and $-16, %edi - add %eax, %edx -# else - lea 16(%eax), %edx - and $-16, %edx -# endif - jmp L(loop_prolog) - - .p2align 4 -L(return_null_1): - xor %eax, %eax - ret - -# ifndef USE_AS_RAWMEMCHR - CFI_POP (%edi) -# endif - - .p2align 4 -L(crosscache): -/* Handle unaligned string. */ - -# ifndef USE_AS_RAWMEMCHR - PUSH (%edi) - mov %eax, %edi - and $15, %ecx - and $-16, %edi - movdqa (%edi), %xmm0 -# else - mov %eax, %edx - and $15, %ecx - and $-16, %edx - movdqa (%edx), %xmm0 -# endif - pcmpeqb %xmm1, %xmm0 -/* Check if there is a match. */ - pmovmskb %xmm0, %eax -/* Remove the leading bytes. */ - sar %cl, %eax - test %eax, %eax - je L(unaligned_no_match) -/* Check which byte is a match. */ - bsf %eax, %eax - -# ifndef USE_AS_RAWMEMCHR - sub %eax, %edx - jbe L(return_null) - add %edi, %eax - add %ecx, %eax - RETURN -# else - add %edx, %eax - add %ecx, %eax - ret -# endif - - .p2align 4 -L(unaligned_no_match): -# ifndef USE_AS_RAWMEMCHR - /* Calculate the last acceptable address and check for possible - addition overflow by using satured math: - edx = ecx + edx - edx |= -(edx < ecx) */ - add %ecx, %edx - sbb %eax, %eax - or %eax, %edx - sub $16, %edx - jbe L(return_null) - add $16, %edi -# else - add $16, %edx -# endif - - .p2align 4 -/* Loop start on aligned string. */ -L(loop_prolog): -# ifndef USE_AS_RAWMEMCHR - sub $64, %edx - jbe L(exit_loop) - movdqa (%edi), %xmm0 -# else - movdqa (%edx), %xmm0 -# endif - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches) - -# ifndef USE_AS_RAWMEMCHR - movdqa 16(%edi), %xmm2 -# else - movdqa 16(%edx), %xmm2 -# endif - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches16) - -# ifndef USE_AS_RAWMEMCHR - movdqa 32(%edi), %xmm3 -# else - movdqa 32(%edx), %xmm3 -# endif - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches32) - -# ifndef USE_AS_RAWMEMCHR - movdqa 48(%edi), %xmm4 -# else - movdqa 48(%edx), %xmm4 -# endif - pcmpeqb %xmm1, %xmm4 - -# ifndef USE_AS_RAWMEMCHR - add $64, %edi -# else - add $64, %edx -# endif - pmovmskb %xmm4, %eax - test %eax, %eax - jnz L(matches0) - -# ifndef USE_AS_RAWMEMCHR - test $0x3f, %edi -# else - test $0x3f, %edx -# endif - jz L(align64_loop) - -# ifndef USE_AS_RAWMEMCHR - sub $64, %edx - jbe L(exit_loop) - movdqa (%edi), %xmm0 -# else - movdqa (%edx), %xmm0 -# endif - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches) - -# ifndef USE_AS_RAWMEMCHR - movdqa 16(%edi), %xmm2 -# else - movdqa 16(%edx), %xmm2 -# endif - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches16) - -# ifndef USE_AS_RAWMEMCHR - movdqa 32(%edi), %xmm3 -# else - movdqa 32(%edx), %xmm3 -# endif - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches32) - -# ifndef USE_AS_RAWMEMCHR - movdqa 48(%edi), %xmm3 -# else - movdqa 48(%edx), %xmm3 -# endif - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - -# ifndef USE_AS_RAWMEMCHR - add $64, %edi -# else - add $64, %edx -# endif - test %eax, %eax - jnz L(matches0) - -# ifndef USE_AS_RAWMEMCHR - mov %edi, %ecx - and $-64, %edi - and $63, %ecx - add %ecx, %edx -# else - and $-64, %edx -# endif - - .p2align 4 -L(align64_loop): -# ifndef USE_AS_RAWMEMCHR - sub $64, %edx - jbe L(exit_loop) - movdqa (%edi), %xmm0 - movdqa 16(%edi), %xmm2 - movdqa 32(%edi), %xmm3 - movdqa 48(%edi), %xmm4 -# else - movdqa (%edx), %xmm0 - movdqa 16(%edx), %xmm2 - movdqa 32(%edx), %xmm3 - movdqa 48(%edx), %xmm4 -# endif - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm1, %xmm2 - pcmpeqb %xmm1, %xmm3 - pcmpeqb %xmm1, %xmm4 - - pmaxub %xmm0, %xmm3 - pmaxub %xmm2, %xmm4 - pmaxub %xmm3, %xmm4 - pmovmskb %xmm4, %eax - -# ifndef USE_AS_RAWMEMCHR - add $64, %edi -# else - add $64, %edx -# endif - - test %eax, %eax - jz L(align64_loop) - -# ifndef USE_AS_RAWMEMCHR - sub $64, %edi -# else - sub $64, %edx -# endif - - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches) - - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches16) - -# ifndef USE_AS_RAWMEMCHR - movdqa 32(%edi), %xmm3 -# else - movdqa 32(%edx), %xmm3 -# endif - - pcmpeqb %xmm1, %xmm3 - -# ifndef USE_AS_RAWMEMCHR - pcmpeqb 48(%edi), %xmm1 -# else - pcmpeqb 48(%edx), %xmm1 -# endif - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches32) - - pmovmskb %xmm1, %eax - bsf %eax, %eax - -# ifndef USE_AS_RAWMEMCHR - lea 48(%edi, %eax), %eax - RETURN -# else - lea 48(%edx, %eax), %eax - ret -# endif - -# ifndef USE_AS_RAWMEMCHR - .p2align 4 -L(exit_loop): - add $64, %edx - cmp $32, %edx - jbe L(exit_loop_32) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches) - - movdqa 16(%edi), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches16) - - movdqa 32(%edi), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches32_1) - cmp $48, %edx - jbe L(return_null) - - pcmpeqb 48(%edi), %xmm1 - pmovmskb %xmm1, %eax - test %eax, %eax - jnz L(matches48_1) - xor %eax, %eax - RETURN - - .p2align 4 -L(exit_loop_32): - movdqa (%edi), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches_1) - cmp $16, %edx - jbe L(return_null) - - pcmpeqb 16(%edi), %xmm1 - pmovmskb %xmm1, %eax - test %eax, %eax - jnz L(matches16_1) - xor %eax, %eax - RETURN -# endif - .p2align 4 -L(matches0): - bsf %eax, %eax -# ifndef USE_AS_RAWMEMCHR - lea -16(%eax, %edi), %eax - RETURN -# else - lea -16(%eax, %edx), %eax - ret -# endif - - .p2align 4 -L(matches): - bsf %eax, %eax -# ifndef USE_AS_RAWMEMCHR - add %edi, %eax - RETURN -# else - add %edx, %eax - ret -# endif - - .p2align 4 -L(matches16): - bsf %eax, %eax -# ifndef USE_AS_RAWMEMCHR - lea 16(%eax, %edi), %eax - RETURN -# else - lea 16(%eax, %edx), %eax - ret -# endif - - .p2align 4 -L(matches32): - bsf %eax, %eax -# ifndef USE_AS_RAWMEMCHR - lea 32(%eax, %edi), %eax - RETURN -# else - lea 32(%eax, %edx), %eax - ret -# endif - -# ifndef USE_AS_RAWMEMCHR - .p2align 4 -L(matches_1): - bsf %eax, %eax - sub %eax, %edx - jbe L(return_null) - - add %edi, %eax - RETURN - - .p2align 4 -L(matches16_1): - sub $16, %edx - bsf %eax, %eax - sub %eax, %edx - jbe L(return_null) - - lea 16(%edi, %eax), %eax - RETURN - - .p2align 4 -L(matches32_1): - sub $32, %edx - bsf %eax, %eax - sub %eax, %edx - jbe L(return_null) - - lea 32(%edi, %eax), %eax - RETURN - - .p2align 4 -L(matches48_1): - sub $48, %edx - bsf %eax, %eax - sub %eax, %edx - jbe L(return_null) - - lea 48(%edi, %eax), %eax - RETURN -# endif - .p2align 4 -L(return_null): - xor %eax, %eax -# ifndef USE_AS_RAWMEMCHR - RETURN -# else - ret -# endif - -END (MEMCHR) -#endif diff --git a/sysdeps/i386/i686/multiarch/memchr-sse2.S b/sysdeps/i386/i686/multiarch/memchr-sse2.S deleted file mode 100644 index 172d70de13..0000000000 --- a/sysdeps/i386/i686/multiarch/memchr-sse2.S +++ /dev/null @@ -1,709 +0,0 @@ -/* Optimized memchr with sse2 without bsf - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# ifndef USE_AS_RAWMEMCHR -# define ENTRANCE PUSH(%edi); -# define PARMS 8 -# define RETURN POP(%edi); ret; CFI_PUSH(%edi); -# else -# define ENTRANCE -# define PARMS 4 -# endif - -# define STR1 PARMS -# define STR2 STR1+4 - -# ifndef USE_AS_RAWMEMCHR -# define LEN STR2+4 -# endif - -# ifndef MEMCHR -# define MEMCHR __memchr_sse2 -# endif - - atom_text_section -ENTRY (MEMCHR) - ENTRANCE - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 -# ifndef USE_AS_RAWMEMCHR - mov LEN(%esp), %edx - test %edx, %edx - jz L(return_null) -# endif - - punpcklbw %xmm1, %xmm1 -# ifndef USE_AS_RAWMEMCHR - mov %ecx, %edi -# else - mov %ecx, %edx -# endif - punpcklbw %xmm1, %xmm1 - - and $63, %ecx - pshufd $0, %xmm1, %xmm1 - cmp $48, %ecx - ja L(crosscache) - -# ifndef USE_AS_RAWMEMCHR - movdqu (%edi), %xmm0 -# else - movdqu (%edx), %xmm0 -# endif - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax -# ifndef USE_AS_RAWMEMCHR - jnz L(match_case2_prolog) - - sub $16, %edx - jbe L(return_null) - lea 16(%edi), %edi - and $15, %ecx - and $-16, %edi - add %ecx, %edx -# else - jnz L(match_case1_prolog) - lea 16(%edx), %edx - and $-16, %edx -# endif - jmp L(loop_prolog) - - .p2align 4 -L(crosscache): - and $15, %ecx -# ifndef USE_AS_RAWMEMCHR - and $-16, %edi - movdqa (%edi), %xmm0 -# else - and $-16, %edx - movdqa (%edx), %xmm0 -# endif - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - sar %cl, %eax - test %eax, %eax - -# ifndef USE_AS_RAWMEMCHR - jnz L(match_case2_prolog1) - /* "ecx" is less than 16. Calculate "edx + ecx - 16" by using - "edx - (16 - ecx)" instead of "(edx + ecx) - 16" to void - possible addition overflow. */ - neg %ecx - add $16, %ecx - sub %ecx, %edx - jbe L(return_null) - lea 16(%edi), %edi -# else - jnz L(match_case1_prolog1) - lea 16(%edx), %edx -# endif - - .p2align 4 -L(loop_prolog): -# ifndef USE_AS_RAWMEMCHR - sub $64, %edx - jbe L(exit_loop) - movdqa (%edi), %xmm0 -# else - movdqa (%edx), %xmm0 -# endif - pcmpeqb %xmm1, %xmm0 - xor %ecx, %ecx - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - movdqa 16(%edi), %xmm2 -# else - movdqa 16(%edx), %xmm2 -# endif - pcmpeqb %xmm1, %xmm2 - lea 16(%ecx), %ecx - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - movdqa 32(%edi), %xmm3 -# else - movdqa 32(%edx), %xmm3 -# endif - pcmpeqb %xmm1, %xmm3 - lea 16(%ecx), %ecx - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - movdqa 48(%edi), %xmm4 -# else - movdqa 48(%edx), %xmm4 -# endif - pcmpeqb %xmm1, %xmm4 - lea 16(%ecx), %ecx - pmovmskb %xmm4, %eax - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - lea 64(%edi), %edi - sub $64, %edx - jbe L(exit_loop) - - movdqa (%edi), %xmm0 -# else - lea 64(%edx), %edx - movdqa (%edx), %xmm0 -# endif - pcmpeqb %xmm1, %xmm0 - xor %ecx, %ecx - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - movdqa 16(%edi), %xmm2 -# else - movdqa 16(%edx), %xmm2 -# endif - pcmpeqb %xmm1, %xmm2 - lea 16(%ecx), %ecx - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - movdqa 32(%edi), %xmm3 -# else - movdqa 32(%edx), %xmm3 -# endif - pcmpeqb %xmm1, %xmm3 - lea 16(%ecx), %ecx - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - movdqa 48(%edi), %xmm4 -# else - movdqa 48(%edx), %xmm4 -# endif - pcmpeqb %xmm1, %xmm4 - lea 16(%ecx), %ecx - pmovmskb %xmm4, %eax - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - lea 64(%edi), %edi - mov %edi, %ecx - and $-64, %edi - and $63, %ecx - add %ecx, %edx -# else - lea 64(%edx), %edx - and $-64, %edx -# endif - - .p2align 4 -L(align64_loop): - -# ifndef USE_AS_RAWMEMCHR - sub $64, %edx - jbe L(exit_loop) - movdqa (%edi), %xmm0 - movdqa 16(%edi), %xmm2 - movdqa 32(%edi), %xmm3 - movdqa 48(%edi), %xmm4 -# else - movdqa (%edx), %xmm0 - movdqa 16(%edx), %xmm2 - movdqa 32(%edx), %xmm3 - movdqa 48(%edx), %xmm4 -# endif - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm1, %xmm2 - pcmpeqb %xmm1, %xmm3 - pcmpeqb %xmm1, %xmm4 - - pmaxub %xmm0, %xmm3 - pmaxub %xmm2, %xmm4 - pmaxub %xmm3, %xmm4 -# ifndef USE_AS_RAWMEMCHR - add $64, %edi -# else - add $64, %edx -# endif - pmovmskb %xmm4, %eax - - test %eax, %eax - jz L(align64_loop) - -# ifndef USE_AS_RAWMEMCHR - sub $64, %edi -# else - sub $64, %edx -# endif - - pmovmskb %xmm0, %eax - xor %ecx, %ecx - test %eax, %eax - jnz L(match_case1) - - pmovmskb %xmm2, %eax - lea 16(%ecx), %ecx - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - movdqa 32(%edi), %xmm3 -# else - movdqa 32(%edx), %xmm3 -# endif - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - lea 16(%ecx), %ecx - test %eax, %eax - jnz L(match_case1) - -# ifndef USE_AS_RAWMEMCHR - pcmpeqb 48(%edi), %xmm1 -# else - pcmpeqb 48(%edx), %xmm1 -# endif - pmovmskb %xmm1, %eax - lea 16(%ecx), %ecx - - .p2align 4 -L(match_case1): -# ifndef USE_AS_RAWMEMCHR - add %ecx, %edi -# else -L(match_case1_prolog1): - add %ecx, %edx -L(match_case1_prolog): -# endif - test %al, %al - jz L(match_case1_high) - mov %al, %cl - and $15, %cl - jz L(match_case1_8) - test $0x01, %al - jnz L(ExitCase1_1) - test $0x02, %al - jnz L(ExitCase1_2) - test $0x04, %al - jnz L(ExitCase1_3) -# ifndef USE_AS_RAWMEMCHR - lea 3(%edi), %eax - RETURN -# else - lea 3(%edx), %eax - ret -# endif - - .p2align 4 -L(match_case1_8): - test $0x10, %al - jnz L(ExitCase1_5) - test $0x20, %al - jnz L(ExitCase1_6) - test $0x40, %al - jnz L(ExitCase1_7) -# ifndef USE_AS_RAWMEMCHR - lea 7(%edi), %eax - RETURN -# else - lea 7(%edx), %eax - ret -# endif - - .p2align 4 -L(match_case1_high): - mov %ah, %ch - and $15, %ch - jz L(match_case1_high_8) - test $0x01, %ah - jnz L(ExitCase1_9) - test $0x02, %ah - jnz L(ExitCase1_10) - test $0x04, %ah - jnz L(ExitCase1_11) -# ifndef USE_AS_RAWMEMCHR - lea 11(%edi), %eax - RETURN -# else - lea 11(%edx), %eax - ret -# endif - - .p2align 4 -L(match_case1_high_8): - test $0x10, %ah - jnz L(ExitCase1_13) - test $0x20, %ah - jnz L(ExitCase1_14) - test $0x40, %ah - jnz L(ExitCase1_15) -# ifndef USE_AS_RAWMEMCHR - lea 15(%edi), %eax - RETURN -# else - lea 15(%edx), %eax - ret -# endif - -# ifndef USE_AS_RAWMEMCHR - .p2align 4 -L(exit_loop): - add $64, %edx - - movdqa (%edi), %xmm0 - pcmpeqb %xmm1, %xmm0 - xor %ecx, %ecx - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(match_case2) - cmp $16, %edx - jbe L(return_null) - - movdqa 16(%edi), %xmm2 - pcmpeqb %xmm1, %xmm2 - lea 16(%ecx), %ecx - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(match_case2) - cmp $32, %edx - jbe L(return_null) - - movdqa 32(%edi), %xmm3 - pcmpeqb %xmm1, %xmm3 - lea 16(%ecx), %ecx - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(match_case2) - cmp $48, %edx - jbe L(return_null) - - pcmpeqb 48(%edi), %xmm1 - lea 16(%ecx), %ecx - pmovmskb %xmm1, %eax - test %eax, %eax - jnz L(match_case2) - - xor %eax, %eax - RETURN -# endif - - .p2align 4 -L(ExitCase1_1): -# ifndef USE_AS_RAWMEMCHR - mov %edi, %eax - RETURN -# else - mov %edx, %eax - ret -# endif - - .p2align 4 -L(ExitCase1_2): -# ifndef USE_AS_RAWMEMCHR - lea 1(%edi), %eax - RETURN -# else - lea 1(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_3): -# ifndef USE_AS_RAWMEMCHR - lea 2(%edi), %eax - RETURN -# else - lea 2(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_5): -# ifndef USE_AS_RAWMEMCHR - lea 4(%edi), %eax - RETURN -# else - lea 4(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_6): -# ifndef USE_AS_RAWMEMCHR - lea 5(%edi), %eax - RETURN -# else - lea 5(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_7): -# ifndef USE_AS_RAWMEMCHR - lea 6(%edi), %eax - RETURN -# else - lea 6(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_9): -# ifndef USE_AS_RAWMEMCHR - lea 8(%edi), %eax - RETURN -# else - lea 8(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_10): -# ifndef USE_AS_RAWMEMCHR - lea 9(%edi), %eax - RETURN -# else - lea 9(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_11): -# ifndef USE_AS_RAWMEMCHR - lea 10(%edi), %eax - RETURN -# else - lea 10(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_13): -# ifndef USE_AS_RAWMEMCHR - lea 12(%edi), %eax - RETURN -# else - lea 12(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_14): -# ifndef USE_AS_RAWMEMCHR - lea 13(%edi), %eax - RETURN -# else - lea 13(%edx), %eax - ret -# endif - - .p2align 4 -L(ExitCase1_15): -# ifndef USE_AS_RAWMEMCHR - lea 14(%edi), %eax - RETURN -# else - lea 14(%edx), %eax - ret -# endif - -# ifndef USE_AS_RAWMEMCHR - .p2align 4 -L(match_case2): - sub %ecx, %edx -L(match_case2_prolog1): - add %ecx, %edi -L(match_case2_prolog): - test %al, %al - jz L(match_case2_high) - mov %al, %cl - and $15, %cl - jz L(match_case2_8) - test $0x01, %al - jnz L(ExitCase2_1) - test $0x02, %al - jnz L(ExitCase2_2) - test $0x04, %al - jnz L(ExitCase2_3) - sub $4, %edx - jb L(return_null) - lea 3(%edi), %eax - RETURN - - .p2align 4 -L(match_case2_8): - test $0x10, %al - jnz L(ExitCase2_5) - test $0x20, %al - jnz L(ExitCase2_6) - test $0x40, %al - jnz L(ExitCase2_7) - sub $8, %edx - jb L(return_null) - lea 7(%edi), %eax - RETURN - - .p2align 4 -L(match_case2_high): - mov %ah, %ch - and $15, %ch - jz L(match_case2_high_8) - test $0x01, %ah - jnz L(ExitCase2_9) - test $0x02, %ah - jnz L(ExitCase2_10) - test $0x04, %ah - jnz L(ExitCase2_11) - sub $12, %edx - jb L(return_null) - lea 11(%edi), %eax - RETURN - - .p2align 4 -L(match_case2_high_8): - test $0x10, %ah - jnz L(ExitCase2_13) - test $0x20, %ah - jnz L(ExitCase2_14) - test $0x40, %ah - jnz L(ExitCase2_15) - sub $16, %edx - jb L(return_null) - lea 15(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_1): - mov %edi, %eax - RETURN - - .p2align 4 -L(ExitCase2_2): - sub $2, %edx - jb L(return_null) - lea 1(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_3): - sub $3, %edx - jb L(return_null) - lea 2(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_5): - sub $5, %edx - jb L(return_null) - lea 4(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_6): - sub $6, %edx - jb L(return_null) - lea 5(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_7): - sub $7, %edx - jb L(return_null) - lea 6(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_9): - sub $9, %edx - jb L(return_null) - lea 8(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_10): - sub $10, %edx - jb L(return_null) - lea 9(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_11): - sub $11, %edx - jb L(return_null) - lea 10(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_13): - sub $13, %edx - jb L(return_null) - lea 12(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_14): - sub $14, %edx - jb L(return_null) - lea 13(%edi), %eax - RETURN - - .p2align 4 -L(ExitCase2_15): - sub $15, %edx - jb L(return_null) - lea 14(%edi), %eax - RETURN -# endif - - .p2align 4 -L(return_null): - xor %eax, %eax -# ifndef USE_AS_RAWMEMCHR - RETURN -# else - ret -# endif - -END (MEMCHR) -#endif diff --git a/sysdeps/i386/i686/multiarch/memchr.S b/sysdeps/i386/i686/multiarch/memchr.S deleted file mode 100644 index bd0dace290..0000000000 --- a/sysdeps/i386/i686/multiarch/memchr.S +++ /dev/null @@ -1,65 +0,0 @@ -/* Multiple versions of memchr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if IS_IN (libc) - .text -ENTRY(__memchr) - .type __memchr, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - HAS_CPU_FEATURE (SSE2) - jz 2f - HAS_ARCH_FEATURE (Slow_BSF) - jz 3f - - LOAD_FUNC_GOT_EAX ( __memchr_sse2) - ret - -2: LOAD_FUNC_GOT_EAX (__memchr_ia32) - ret - -3: LOAD_FUNC_GOT_EAX (__memchr_sse2_bsf) - ret -END(__memchr) - -weak_alias(__memchr, memchr) - -# undef ENTRY -# define ENTRY(name) \ - .type __memchr_ia32, @function; \ - .globl __memchr_ia32; \ - .p2align 4; \ - __memchr_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __memchr_ia32, .-__memchr_ia32 - -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_memchr; __GI_memchr = __memchr_ia32 - -#endif -#include "../../memchr.S" diff --git a/sysdeps/i386/i686/multiarch/memcmp-sse4.S b/sysdeps/i386/i686/multiarch/memcmp-sse4.S deleted file mode 100644 index 2aa13048b2..0000000000 --- a/sysdeps/i386/i686/multiarch/memcmp-sse4.S +++ /dev/null @@ -1,1225 +0,0 @@ -/* memcmp with SSE4.2, wmemcmp with SSE4.2 - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# ifndef MEMCMP -# define MEMCMP __memcmp_sse4_2 -# endif - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 4 -# define BLK1 PARMS -# define BLK2 BLK1 + 4 -# define LEN BLK2 + 4 -# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx) - - -# ifdef SHARED -# define JMPTBL(I, B) I - B - -/* Load an entry in a jump table into EBX and branch to it. TABLE is a - jump table with relative offsets. INDEX is a register contains the - index into the jump table. SCALE is the scale of INDEX. */ - -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ -/* We first load PC into EBX. */ \ - SETUP_PIC_REG(bx); \ -/* Get the address of the jump table. */ \ - addl $(TABLE - .), %ebx; \ -/* Get the entry and convert the relative offset to the \ - absolute address. */ \ - addl (%ebx,INDEX,SCALE), %ebx; \ -/* We loaded the jump table and adjusted EDX/ESI. Go. */ \ - jmp *%ebx -# else -# define JMPTBL(I, B) I - -/* Load an entry in a jump table into EBX and branch to it. TABLE is a - jump table with relative offsets. INDEX is a register contains the - index into the jump table. SCALE is the scale of INDEX. */ -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - jmp *TABLE(,INDEX,SCALE) -# endif - - -/* Warning! - wmemcmp has to use SIGNED comparison for elements. - memcmp has to use UNSIGNED comparison for elemnts. -*/ - - .section .text.sse4.2,"ax",@progbits -ENTRY (MEMCMP) - movl BLK1(%esp), %eax - movl BLK2(%esp), %edx - movl LEN(%esp), %ecx - -# ifdef USE_AS_WMEMCMP - shl $2, %ecx - test %ecx, %ecx - jz L(return0) -# else - cmp $1, %ecx - jbe L(less1bytes) -# endif - - pxor %xmm0, %xmm0 - cmp $64, %ecx - ja L(64bytesormore) - cmp $8, %ecx - -# ifndef USE_AS_WMEMCMP - PUSH (%ebx) - jb L(less8bytes) -# else - jb L(less8bytes) - PUSH (%ebx) -# endif - - add %ecx, %edx - add %ecx, %eax - BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4) - -# ifndef USE_AS_WMEMCMP - .p2align 4 -L(less8bytes): - mov (%eax), %bl - cmpb (%edx), %bl - jne L(nonzero) - - mov 1(%eax), %bl - cmpb 1(%edx), %bl - jne L(nonzero) - - cmp $2, %ecx - jz L(0bytes) - - mov 2(%eax), %bl - cmpb 2(%edx), %bl - jne L(nonzero) - - cmp $3, %ecx - jz L(0bytes) - - mov 3(%eax), %bl - cmpb 3(%edx), %bl - jne L(nonzero) - - cmp $4, %ecx - jz L(0bytes) - - mov 4(%eax), %bl - cmpb 4(%edx), %bl - jne L(nonzero) - - cmp $5, %ecx - jz L(0bytes) - - mov 5(%eax), %bl - cmpb 5(%edx), %bl - jne L(nonzero) - - cmp $6, %ecx - jz L(0bytes) - - mov 6(%eax), %bl - cmpb 6(%edx), %bl - je L(0bytes) - -L(nonzero): - POP (%ebx) - mov $1, %eax - ja L(above) - neg %eax -L(above): - ret - CFI_PUSH (%ebx) -# endif - - .p2align 4 -L(0bytes): - POP (%ebx) - xor %eax, %eax - ret - -# ifdef USE_AS_WMEMCMP - -/* for wmemcmp, case N == 1 */ - - .p2align 4 -L(less8bytes): - mov (%eax), %ecx - cmp (%edx), %ecx - je L(return0) - mov $1, %eax - jg L(find_diff_bigger) - neg %eax - ret - - .p2align 4 -L(find_diff_bigger): - ret - - .p2align 4 -L(return0): - xor %eax, %eax - ret -# endif - -# ifndef USE_AS_WMEMCMP - .p2align 4 -L(less1bytes): - jb L(0bytesend) - movzbl (%eax), %eax - movzbl (%edx), %edx - sub %edx, %eax - ret - - .p2align 4 -L(0bytesend): - xor %eax, %eax - ret -# endif - .p2align 4 -L(64bytesormore): - PUSH (%ebx) - mov %ecx, %ebx - mov $64, %ecx - sub $64, %ebx -L(64bytesormore_loop): - movdqu (%eax), %xmm1 - movdqu (%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(find_16diff) - - movdqu 16(%eax), %xmm1 - movdqu 16(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(find_32diff) - - movdqu 32(%eax), %xmm1 - movdqu 32(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(find_48diff) - - movdqu 48(%eax), %xmm1 - movdqu 48(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(find_64diff) - add %ecx, %eax - add %ecx, %edx - sub %ecx, %ebx - jae L(64bytesormore_loop) - add %ebx, %ecx - add %ecx, %edx - add %ecx, %eax - BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4) - -# ifdef USE_AS_WMEMCMP - -/* Label needs only for table_64bytes filling */ -L(unreal_case): -/* no code here */ - -# endif - .p2align 4 -L(find_16diff): - sub $16, %ecx -L(find_32diff): - sub $16, %ecx -L(find_48diff): - sub $16, %ecx -L(find_64diff): - add %ecx, %edx - add %ecx, %eax - -# ifndef USE_AS_WMEMCMP - .p2align 4 -L(16bytes): - mov -16(%eax), %ecx - mov -16(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(12bytes): - mov -12(%eax), %ecx - mov -12(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(8bytes): - mov -8(%eax), %ecx - mov -8(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(4bytes): - mov -4(%eax), %ecx - mov -4(%edx), %ebx - cmp %ebx, %ecx - mov $0, %eax - jne L(find_diff) - RETURN -# else - .p2align 4 -L(16bytes): - mov -16(%eax), %ecx - cmp -16(%edx), %ecx - jne L(find_diff) -L(12bytes): - mov -12(%eax), %ecx - cmp -12(%edx), %ecx - jne L(find_diff) -L(8bytes): - mov -8(%eax), %ecx - cmp -8(%edx), %ecx - jne L(find_diff) -L(4bytes): - mov -4(%eax), %ecx - cmp -4(%edx), %ecx - mov $0, %eax - jne L(find_diff) - RETURN -# endif - -# ifndef USE_AS_WMEMCMP - .p2align 4 -L(49bytes): - movdqu -49(%eax), %xmm1 - movdqu -49(%edx), %xmm2 - mov $-49, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(33bytes): - movdqu -33(%eax), %xmm1 - movdqu -33(%edx), %xmm2 - mov $-33, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(17bytes): - mov -17(%eax), %ecx - mov -17(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(13bytes): - mov -13(%eax), %ecx - mov -13(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(9bytes): - mov -9(%eax), %ecx - mov -9(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(5bytes): - mov -5(%eax), %ecx - mov -5(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzbl -1(%eax), %ecx - cmp -1(%edx), %cl - mov $0, %eax - jne L(end) - RETURN - - .p2align 4 -L(50bytes): - mov $-50, %ebx - movdqu -50(%eax), %xmm1 - movdqu -50(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(34bytes): - mov $-34, %ebx - movdqu -34(%eax), %xmm1 - movdqu -34(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(18bytes): - mov -18(%eax), %ecx - mov -18(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(14bytes): - mov -14(%eax), %ecx - mov -14(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(10bytes): - mov -10(%eax), %ecx - mov -10(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(6bytes): - mov -6(%eax), %ecx - mov -6(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(2bytes): - movzwl -2(%eax), %ecx - movzwl -2(%edx), %ebx - cmp %bl, %cl - jne L(end) - cmp %bh, %ch - mov $0, %eax - jne L(end) - RETURN - - .p2align 4 -L(51bytes): - mov $-51, %ebx - movdqu -51(%eax), %xmm1 - movdqu -51(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(35bytes): - mov $-35, %ebx - movdqu -35(%eax), %xmm1 - movdqu -35(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(19bytes): - movl -19(%eax), %ecx - movl -19(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(15bytes): - movl -15(%eax), %ecx - movl -15(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(11bytes): - movl -11(%eax), %ecx - movl -11(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(7bytes): - movl -7(%eax), %ecx - movl -7(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(3bytes): - movzwl -3(%eax), %ecx - movzwl -3(%edx), %ebx - cmpb %bl, %cl - jne L(end) - cmp %bx, %cx - jne L(end) -L(1bytes): - movzbl -1(%eax), %eax - cmpb -1(%edx), %al - mov $0, %eax - jne L(end) - RETURN -# endif - .p2align 4 -L(52bytes): - movdqu -52(%eax), %xmm1 - movdqu -52(%edx), %xmm2 - mov $-52, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(36bytes): - movdqu -36(%eax), %xmm1 - movdqu -36(%edx), %xmm2 - mov $-36, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(20bytes): - movdqu -20(%eax), %xmm1 - movdqu -20(%edx), %xmm2 - mov $-20, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - mov -4(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -4(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -4(%edx), %ecx -# endif - mov $0, %eax - jne L(find_diff) - RETURN - -# ifndef USE_AS_WMEMCMP - .p2align 4 -L(53bytes): - movdqu -53(%eax), %xmm1 - movdqu -53(%edx), %xmm2 - mov $-53, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(37bytes): - mov $-37, %ebx - movdqu -37(%eax), %xmm1 - movdqu -37(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(21bytes): - mov $-21, %ebx - movdqu -21(%eax), %xmm1 - movdqu -21(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - mov -5(%eax), %ecx - mov -5(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzbl -1(%eax), %ecx - cmp -1(%edx), %cl - mov $0, %eax - jne L(end) - RETURN - - .p2align 4 -L(54bytes): - movdqu -54(%eax), %xmm1 - movdqu -54(%edx), %xmm2 - mov $-54, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(38bytes): - mov $-38, %ebx - movdqu -38(%eax), %xmm1 - movdqu -38(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(22bytes): - mov $-22, %ebx - movdqu -22(%eax), %xmm1 - movdqu -22(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - - mov -6(%eax), %ecx - mov -6(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzwl -2(%eax), %ecx - movzwl -2(%edx), %ebx - cmp %bl, %cl - jne L(end) - cmp %bh, %ch - mov $0, %eax - jne L(end) - RETURN - - .p2align 4 -L(55bytes): - movdqu -55(%eax), %xmm1 - movdqu -55(%edx), %xmm2 - mov $-55, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(39bytes): - mov $-39, %ebx - movdqu -39(%eax), %xmm1 - movdqu -39(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(23bytes): - mov $-23, %ebx - movdqu -23(%eax), %xmm1 - movdqu -23(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - movl -7(%eax), %ecx - movl -7(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzwl -3(%eax), %ecx - movzwl -3(%edx), %ebx - cmpb %bl, %cl - jne L(end) - cmp %bx, %cx - jne L(end) - movzbl -1(%eax), %eax - cmpb -1(%edx), %al - mov $0, %eax - jne L(end) - RETURN -# endif - .p2align 4 -L(56bytes): - movdqu -56(%eax), %xmm1 - movdqu -56(%edx), %xmm2 - mov $-56, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(40bytes): - mov $-40, %ebx - movdqu -40(%eax), %xmm1 - movdqu -40(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(24bytes): - mov $-24, %ebx - movdqu -24(%eax), %xmm1 - movdqu -24(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - - mov -8(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -8(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -8(%edx), %ecx -# endif - jne L(find_diff) - - mov -4(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -4(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -4(%edx), %ecx -# endif - mov $0, %eax - jne L(find_diff) - RETURN - -# ifndef USE_AS_WMEMCMP - .p2align 4 -L(57bytes): - movdqu -57(%eax), %xmm1 - movdqu -57(%edx), %xmm2 - mov $-57, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(41bytes): - mov $-41, %ebx - movdqu -41(%eax), %xmm1 - movdqu -41(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(25bytes): - mov $-25, %ebx - movdqu -25(%eax), %xmm1 - movdqu -25(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - mov -9(%eax), %ecx - mov -9(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - mov -5(%eax), %ecx - mov -5(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzbl -1(%eax), %ecx - cmp -1(%edx), %cl - mov $0, %eax - jne L(end) - RETURN - - .p2align 4 -L(58bytes): - movdqu -58(%eax), %xmm1 - movdqu -58(%edx), %xmm2 - mov $-58, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(42bytes): - mov $-42, %ebx - movdqu -42(%eax), %xmm1 - movdqu -42(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(26bytes): - mov $-26, %ebx - movdqu -26(%eax), %xmm1 - movdqu -26(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - - mov -10(%eax), %ecx - mov -10(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - - mov -6(%eax), %ecx - mov -6(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - - movzwl -2(%eax), %ecx - movzwl -2(%edx), %ebx - cmp %bl, %cl - jne L(end) - cmp %bh, %ch - mov $0, %eax - jne L(end) - RETURN - - .p2align 4 -L(59bytes): - movdqu -59(%eax), %xmm1 - movdqu -59(%edx), %xmm2 - mov $-59, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(43bytes): - mov $-43, %ebx - movdqu -43(%eax), %xmm1 - movdqu -43(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(27bytes): - mov $-27, %ebx - movdqu -27(%eax), %xmm1 - movdqu -27(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - movl -11(%eax), %ecx - movl -11(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movl -7(%eax), %ecx - movl -7(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzwl -3(%eax), %ecx - movzwl -3(%edx), %ebx - cmpb %bl, %cl - jne L(end) - cmp %bx, %cx - jne L(end) - movzbl -1(%eax), %eax - cmpb -1(%edx), %al - mov $0, %eax - jne L(end) - RETURN -# endif - .p2align 4 -L(60bytes): - movdqu -60(%eax), %xmm1 - movdqu -60(%edx), %xmm2 - mov $-60, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(44bytes): - mov $-44, %ebx - movdqu -44(%eax), %xmm1 - movdqu -44(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(28bytes): - mov $-28, %ebx - movdqu -28(%eax), %xmm1 - movdqu -28(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - - mov -12(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -12(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -12(%edx), %ecx -# endif - jne L(find_diff) - - mov -8(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -8(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -8(%edx), %ecx -# endif - jne L(find_diff) - - mov -4(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -4(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -4(%edx), %ecx -# endif - mov $0, %eax - jne L(find_diff) - RETURN - -# ifndef USE_AS_WMEMCMP - .p2align 4 -L(61bytes): - movdqu -61(%eax), %xmm1 - movdqu -61(%edx), %xmm2 - mov $-61, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(45bytes): - mov $-45, %ebx - movdqu -45(%eax), %xmm1 - movdqu -45(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(29bytes): - mov $-29, %ebx - movdqu -29(%eax), %xmm1 - movdqu -29(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - - mov -13(%eax), %ecx - mov -13(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - - mov -9(%eax), %ecx - mov -9(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - - mov -5(%eax), %ecx - mov -5(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzbl -1(%eax), %ecx - cmp -1(%edx), %cl - mov $0, %eax - jne L(end) - RETURN - - .p2align 4 -L(62bytes): - movdqu -62(%eax), %xmm1 - movdqu -62(%edx), %xmm2 - mov $-62, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(46bytes): - mov $-46, %ebx - movdqu -46(%eax), %xmm1 - movdqu -46(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(30bytes): - mov $-30, %ebx - movdqu -30(%eax), %xmm1 - movdqu -30(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - mov -14(%eax), %ecx - mov -14(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - mov -10(%eax), %ecx - mov -10(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - mov -6(%eax), %ecx - mov -6(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzwl -2(%eax), %ecx - movzwl -2(%edx), %ebx - cmp %bl, %cl - jne L(end) - cmp %bh, %ch - mov $0, %eax - jne L(end) - RETURN - - .p2align 4 -L(63bytes): - movdqu -63(%eax), %xmm1 - movdqu -63(%edx), %xmm2 - mov $-63, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(47bytes): - mov $-47, %ebx - movdqu -47(%eax), %xmm1 - movdqu -47(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(31bytes): - mov $-31, %ebx - movdqu -31(%eax), %xmm1 - movdqu -31(%edx), %xmm2 - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - - movl -15(%eax), %ecx - movl -15(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movl -11(%eax), %ecx - movl -11(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movl -7(%eax), %ecx - movl -7(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzwl -3(%eax), %ecx - movzwl -3(%edx), %ebx - cmpb %bl, %cl - jne L(end) - cmp %bx, %cx - jne L(end) - movzbl -1(%eax), %eax - cmpb -1(%edx), %al - mov $0, %eax - jne L(end) - RETURN -# endif - - .p2align 4 -L(64bytes): - movdqu -64(%eax), %xmm1 - movdqu -64(%edx), %xmm2 - mov $-64, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(48bytes): - movdqu -48(%eax), %xmm1 - movdqu -48(%edx), %xmm2 - mov $-48, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) -L(32bytes): - movdqu -32(%eax), %xmm1 - movdqu -32(%edx), %xmm2 - mov $-32, %ebx - pxor %xmm1, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - - mov -16(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -16(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -16(%edx), %ecx -# endif - jne L(find_diff) - - mov -12(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -12(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -12(%edx), %ecx -# endif - jne L(find_diff) - - mov -8(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -8(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -8(%edx), %ecx -# endif - jne L(find_diff) - - mov -4(%eax), %ecx -# ifndef USE_AS_WMEMCMP - mov -4(%edx), %ebx - cmp %ebx, %ecx -# else - cmp -4(%edx), %ecx -# endif - mov $0, %eax - jne L(find_diff) - RETURN - -# ifndef USE_AS_WMEMCMP - .p2align 4 -L(less16bytes): - add %ebx, %eax - add %ebx, %edx - - mov (%eax), %ecx - mov (%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - - mov 4(%eax), %ecx - mov 4(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - - mov 8(%eax), %ecx - mov 8(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - - mov 12(%eax), %ecx - mov 12(%edx), %ebx - cmp %ebx, %ecx - mov $0, %eax - jne L(find_diff) - RETURN -# else - .p2align 4 -L(less16bytes): - add %ebx, %eax - add %ebx, %edx - - mov (%eax), %ecx - cmp (%edx), %ecx - jne L(find_diff) - - mov 4(%eax), %ecx - cmp 4(%edx), %ecx - jne L(find_diff) - - mov 8(%eax), %ecx - cmp 8(%edx), %ecx - jne L(find_diff) - - mov 12(%eax), %ecx - cmp 12(%edx), %ecx - - mov $0, %eax - jne L(find_diff) - RETURN -# endif - - .p2align 4 -L(find_diff): -# ifndef USE_AS_WMEMCMP - cmpb %bl, %cl - jne L(end) - cmp %bx, %cx - jne L(end) - shr $16,%ecx - shr $16,%ebx - cmp %bl, %cl - jne L(end) - cmp %bx, %cx -L(end): - POP (%ebx) - mov $1, %eax - ja L(bigger) - neg %eax -L(bigger): - ret -# else - POP (%ebx) - mov $1, %eax - jg L(bigger) - neg %eax - ret - - .p2align 4 -L(bigger): - ret -# endif -END (MEMCMP) - - .section .rodata.sse4.2,"a",@progbits - .p2align 2 - .type L(table_64bytes), @object -# ifndef USE_AS_WMEMCMP -L(table_64bytes): - .int JMPTBL (L(0bytes), L(table_64bytes)) - .int JMPTBL (L(1bytes), L(table_64bytes)) - .int JMPTBL (L(2bytes), L(table_64bytes)) - .int JMPTBL (L(3bytes), L(table_64bytes)) - .int JMPTBL (L(4bytes), L(table_64bytes)) - .int JMPTBL (L(5bytes), L(table_64bytes)) - .int JMPTBL (L(6bytes), L(table_64bytes)) - .int JMPTBL (L(7bytes), L(table_64bytes)) - .int JMPTBL (L(8bytes), L(table_64bytes)) - .int JMPTBL (L(9bytes), L(table_64bytes)) - .int JMPTBL (L(10bytes), L(table_64bytes)) - .int JMPTBL (L(11bytes), L(table_64bytes)) - .int JMPTBL (L(12bytes), L(table_64bytes)) - .int JMPTBL (L(13bytes), L(table_64bytes)) - .int JMPTBL (L(14bytes), L(table_64bytes)) - .int JMPTBL (L(15bytes), L(table_64bytes)) - .int JMPTBL (L(16bytes), L(table_64bytes)) - .int JMPTBL (L(17bytes), L(table_64bytes)) - .int JMPTBL (L(18bytes), L(table_64bytes)) - .int JMPTBL (L(19bytes), L(table_64bytes)) - .int JMPTBL (L(20bytes), L(table_64bytes)) - .int JMPTBL (L(21bytes), L(table_64bytes)) - .int JMPTBL (L(22bytes), L(table_64bytes)) - .int JMPTBL (L(23bytes), L(table_64bytes)) - .int JMPTBL (L(24bytes), L(table_64bytes)) - .int JMPTBL (L(25bytes), L(table_64bytes)) - .int JMPTBL (L(26bytes), L(table_64bytes)) - .int JMPTBL (L(27bytes), L(table_64bytes)) - .int JMPTBL (L(28bytes), L(table_64bytes)) - .int JMPTBL (L(29bytes), L(table_64bytes)) - .int JMPTBL (L(30bytes), L(table_64bytes)) - .int JMPTBL (L(31bytes), L(table_64bytes)) - .int JMPTBL (L(32bytes), L(table_64bytes)) - .int JMPTBL (L(33bytes), L(table_64bytes)) - .int JMPTBL (L(34bytes), L(table_64bytes)) - .int JMPTBL (L(35bytes), L(table_64bytes)) - .int JMPTBL (L(36bytes), L(table_64bytes)) - .int JMPTBL (L(37bytes), L(table_64bytes)) - .int JMPTBL (L(38bytes), L(table_64bytes)) - .int JMPTBL (L(39bytes), L(table_64bytes)) - .int JMPTBL (L(40bytes), L(table_64bytes)) - .int JMPTBL (L(41bytes), L(table_64bytes)) - .int JMPTBL (L(42bytes), L(table_64bytes)) - .int JMPTBL (L(43bytes), L(table_64bytes)) - .int JMPTBL (L(44bytes), L(table_64bytes)) - .int JMPTBL (L(45bytes), L(table_64bytes)) - .int JMPTBL (L(46bytes), L(table_64bytes)) - .int JMPTBL (L(47bytes), L(table_64bytes)) - .int JMPTBL (L(48bytes), L(table_64bytes)) - .int JMPTBL (L(49bytes), L(table_64bytes)) - .int JMPTBL (L(50bytes), L(table_64bytes)) - .int JMPTBL (L(51bytes), L(table_64bytes)) - .int JMPTBL (L(52bytes), L(table_64bytes)) - .int JMPTBL (L(53bytes), L(table_64bytes)) - .int JMPTBL (L(54bytes), L(table_64bytes)) - .int JMPTBL (L(55bytes), L(table_64bytes)) - .int JMPTBL (L(56bytes), L(table_64bytes)) - .int JMPTBL (L(57bytes), L(table_64bytes)) - .int JMPTBL (L(58bytes), L(table_64bytes)) - .int JMPTBL (L(59bytes), L(table_64bytes)) - .int JMPTBL (L(60bytes), L(table_64bytes)) - .int JMPTBL (L(61bytes), L(table_64bytes)) - .int JMPTBL (L(62bytes), L(table_64bytes)) - .int JMPTBL (L(63bytes), L(table_64bytes)) - .int JMPTBL (L(64bytes), L(table_64bytes)) -# else -L(table_64bytes): - .int JMPTBL (L(0bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(4bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(8bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(12bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(16bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(20bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(24bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(28bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(32bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(36bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(40bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(44bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(48bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(52bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(56bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(60bytes), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(unreal_case), L(table_64bytes)) - .int JMPTBL (L(64bytes), L(table_64bytes)) -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/memcmp-ssse3.S b/sysdeps/i386/i686/multiarch/memcmp-ssse3.S deleted file mode 100644 index 5ebf5a4d73..0000000000 --- a/sysdeps/i386/i686/multiarch/memcmp-ssse3.S +++ /dev/null @@ -1,2157 +0,0 @@ -/* memcmp with SSSE3, wmemcmp with SSSE3 - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# ifndef MEMCMP -# define MEMCMP __memcmp_ssse3 -# endif - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 4 -# define BLK1 PARMS -# define BLK2 BLK1+4 -# define LEN BLK2+4 -# define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret -# define RETURN RETURN_END; cfi_restore_state; cfi_remember_state - -/* Warning! - wmemcmp has to use SIGNED comparison for elements. - memcmp has to use UNSIGNED comparison for elemnts. -*/ - - atom_text_section -ENTRY (MEMCMP) - movl LEN(%esp), %ecx - -# ifdef USE_AS_WMEMCMP - shl $2, %ecx - test %ecx, %ecx - jz L(zero) -# endif - - movl BLK1(%esp), %eax - cmp $48, %ecx - movl BLK2(%esp), %edx - jae L(48bytesormore) - -# ifndef USE_AS_WMEMCMP - cmp $1, %ecx - jbe L(less1bytes) -# endif - - PUSH (%ebx) - add %ecx, %edx - add %ecx, %eax - jmp L(less48bytes) - - CFI_POP (%ebx) - -# ifndef USE_AS_WMEMCMP - .p2align 4 -L(less1bytes): - jb L(zero) - movb (%eax), %cl - cmp (%edx), %cl - je L(zero) - mov $1, %eax - ja L(1bytesend) - neg %eax -L(1bytesend): - ret -# endif - - .p2align 4 -L(zero): - xor %eax, %eax - ret - - .p2align 4 -L(48bytesormore): - PUSH (%ebx) - PUSH (%esi) - PUSH (%edi) - cfi_remember_state - movdqu (%eax), %xmm3 - movdqu (%edx), %xmm0 - movl %eax, %edi - movl %edx, %esi - pcmpeqb %xmm0, %xmm3 - pmovmskb %xmm3, %edx - lea 16(%edi), %edi - - sub $0xffff, %edx - lea 16(%esi), %esi - jnz L(less16bytes) - mov %edi, %edx - and $0xf, %edx - xor %edx, %edi - sub %edx, %esi - add %edx, %ecx - mov %esi, %edx - and $0xf, %edx - jz L(shr_0) - xor %edx, %esi - -# ifndef USE_AS_WMEMCMP - cmp $8, %edx - jae L(next_unaligned_table) - cmp $0, %edx - je L(shr_0) - cmp $1, %edx - je L(shr_1) - cmp $2, %edx - je L(shr_2) - cmp $3, %edx - je L(shr_3) - cmp $4, %edx - je L(shr_4) - cmp $5, %edx - je L(shr_5) - cmp $6, %edx - je L(shr_6) - jmp L(shr_7) - - .p2align 2 -L(next_unaligned_table): - cmp $8, %edx - je L(shr_8) - cmp $9, %edx - je L(shr_9) - cmp $10, %edx - je L(shr_10) - cmp $11, %edx - je L(shr_11) - cmp $12, %edx - je L(shr_12) - cmp $13, %edx - je L(shr_13) - cmp $14, %edx - je L(shr_14) - jmp L(shr_15) -# else - cmp $0, %edx - je L(shr_0) - cmp $4, %edx - je L(shr_4) - cmp $8, %edx - je L(shr_8) - jmp L(shr_12) -# endif - - .p2align 4 -L(shr_0): - cmp $80, %ecx - jae L(shr_0_gobble) - lea -48(%ecx), %ecx - xor %eax, %eax - movaps (%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - movaps 16(%esi), %xmm2 - pcmpeqb 16(%edi), %xmm2 - pand %xmm1, %xmm2 - pmovmskb %xmm2, %edx - add $32, %edi - add $32, %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea (%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_0_gobble): - lea -48(%ecx), %ecx - movdqa (%esi), %xmm0 - xor %eax, %eax - pcmpeqb (%edi), %xmm0 - sub $32, %ecx - movdqa 16(%esi), %xmm2 - pcmpeqb 16(%edi), %xmm2 -L(shr_0_gobble_loop): - pand %xmm0, %xmm2 - sub $32, %ecx - pmovmskb %xmm2, %edx - movdqa %xmm0, %xmm1 - movdqa 32(%esi), %xmm0 - movdqa 48(%esi), %xmm2 - sbb $0xffff, %edx - pcmpeqb 32(%edi), %xmm0 - pcmpeqb 48(%edi), %xmm2 - lea 32(%edi), %edi - lea 32(%esi), %esi - jz L(shr_0_gobble_loop) - - pand %xmm0, %xmm2 - cmp $0, %ecx - jge L(shr_0_gobble_loop_next) - inc %edx - add $32, %ecx -L(shr_0_gobble_loop_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm2, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea (%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - -# ifndef USE_AS_WMEMCMP - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_1): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_1_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $1,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $1,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 1(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_1_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $1,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $1,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_1_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $1,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $1,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_1_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_1_gobble_next) - inc %edx - add $32, %ecx -L(shr_1_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 1(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_2): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_2_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $2,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $2,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 2(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_2_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $2,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $2,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_2_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $2,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $2,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_2_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_2_gobble_next) - inc %edx - add $32, %ecx -L(shr_2_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 2(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_3): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_3_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $3,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $3,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 3(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_3_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $3,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $3,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_3_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $3,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $3,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_3_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_3_gobble_next) - inc %edx - add $32, %ecx -L(shr_3_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 3(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) -# endif - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_4): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_4_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $4,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $4,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 4(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_4_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $4,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $4,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_4_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $4,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $4,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_4_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_4_gobble_next) - inc %edx - add $32, %ecx -L(shr_4_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 4(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - -# ifndef USE_AS_WMEMCMP - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_5): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_5_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $5,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $5,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 5(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_5_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $5,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $5,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_5_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $5,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $5,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_5_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_5_gobble_next) - inc %edx - add $32, %ecx -L(shr_5_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 5(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_6): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_6_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $6,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $6,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 6(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_6_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $6,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $6,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_6_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $6,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $6,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_6_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_6_gobble_next) - inc %edx - add $32, %ecx -L(shr_6_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 6(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_7): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_7_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $7,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $7,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 7(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_7_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $7,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $7,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_7_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $7,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $7,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_7_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_7_gobble_next) - inc %edx - add $32, %ecx -L(shr_7_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 7(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) -# endif - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_8): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_8_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $8,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $8,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 8(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_8_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $8,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $8,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_8_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $8,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $8,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_8_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_8_gobble_next) - inc %edx - add $32, %ecx -L(shr_8_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 8(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - -# ifndef USE_AS_WMEMCMP - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_9): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_9_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $9,(%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $9,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 9(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_9_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $9,(%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $9,16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_9_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $9,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $9,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_9_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_9_gobble_next) - inc %edx - add $32, %ecx -L(shr_9_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 9(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_10): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_10_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $10, (%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $10,%xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 10(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_10_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $10, (%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $10, 16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_10_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $10,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $10,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_10_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_10_gobble_next) - inc %edx - add $32, %ecx -L(shr_10_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 10(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_11): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_11_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $11, (%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $11, %xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 11(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_11_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $11, (%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $11, 16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_11_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $11,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $11,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_11_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_11_gobble_next) - inc %edx - add $32, %ecx -L(shr_11_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 11(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) -# endif - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_12): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_12_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $12, (%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $12, %xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 12(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_12_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $12, (%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $12, 16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_12_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $12,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $12,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_12_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_12_gobble_next) - inc %edx - add $32, %ecx -L(shr_12_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 12(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - -# ifndef USE_AS_WMEMCMP - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_13): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_13_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $13, (%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $13, %xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 13(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_13_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $13, (%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $13, 16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_13_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $13,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $13,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_13_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_13_gobble_next) - inc %edx - add $32, %ecx -L(shr_13_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 13(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_14): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_14_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $14, (%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $14, %xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 14(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_14_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $14, (%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $14, 16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_14_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $14,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $14,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_14_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_14_gobble_next) - inc %edx - add $32, %ecx -L(shr_14_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 14(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_15): - cmp $80, %ecx - lea -48(%ecx), %ecx - mov %edx, %eax - jae L(shr_15_gobble) - - movdqa 16(%esi), %xmm1 - movdqa %xmm1, %xmm2 - palignr $15, (%esi), %xmm1 - pcmpeqb (%edi), %xmm1 - - movdqa 32(%esi), %xmm3 - palignr $15, %xmm2, %xmm3 - pcmpeqb 16(%edi), %xmm3 - - pand %xmm1, %xmm3 - pmovmskb %xmm3, %edx - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - lea (%ecx, %edi,1), %eax - lea 15(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(shr_15_gobble): - sub $32, %ecx - movdqa 16(%esi), %xmm0 - palignr $15, (%esi), %xmm0 - pcmpeqb (%edi), %xmm0 - - movdqa 32(%esi), %xmm3 - palignr $15, 16(%esi), %xmm3 - pcmpeqb 16(%edi), %xmm3 - -L(shr_15_gobble_loop): - pand %xmm0, %xmm3 - sub $32, %ecx - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - - movdqa 64(%esi), %xmm3 - palignr $15,48(%esi), %xmm3 - sbb $0xffff, %edx - movdqa 48(%esi), %xmm0 - palignr $15,32(%esi), %xmm0 - pcmpeqb 32(%edi), %xmm0 - lea 32(%esi), %esi - pcmpeqb 48(%edi), %xmm3 - - lea 32(%edi), %edi - jz L(shr_15_gobble_loop) - pand %xmm0, %xmm3 - - cmp $0, %ecx - jge L(shr_15_gobble_next) - inc %edx - add $32, %ecx -L(shr_15_gobble_next): - test %edx, %edx - jnz L(exit) - - pmovmskb %xmm3, %edx - movdqa %xmm0, %xmm1 - lea 32(%edi), %edi - lea 32(%esi), %esi - sub $0xffff, %edx - jnz L(exit) - - lea (%ecx, %edi,1), %eax - lea 15(%ecx, %esi,1), %edx - POP (%edi) - POP (%esi) - jmp L(less48bytes) -# endif - - cfi_restore_state - cfi_remember_state - .p2align 4 -L(exit): - pmovmskb %xmm1, %ebx - sub $0xffff, %ebx - jz L(first16bytes) - lea -16(%esi), %esi - lea -16(%edi), %edi - mov %ebx, %edx - -L(first16bytes): - add %eax, %esi -L(less16bytes): - -# ifndef USE_AS_WMEMCMP - test %dl, %dl - jz L(next_24_bytes) - - test $0x01, %dl - jnz L(Byte16) - - test $0x02, %dl - jnz L(Byte17) - - test $0x04, %dl - jnz L(Byte18) - - test $0x08, %dl - jnz L(Byte19) - - test $0x10, %dl - jnz L(Byte20) - - test $0x20, %dl - jnz L(Byte21) - - test $0x40, %dl - jnz L(Byte22) -L(Byte23): - movzbl -9(%edi), %eax - movzbl -9(%esi), %edx - sub %edx, %eax - RETURN - - .p2align 4 -L(Byte16): - movzbl -16(%edi), %eax - movzbl -16(%esi), %edx - sub %edx, %eax - RETURN - - .p2align 4 -L(Byte17): - movzbl -15(%edi), %eax - movzbl -15(%esi), %edx - sub %edx, %eax - RETURN - - .p2align 4 -L(Byte18): - movzbl -14(%edi), %eax - movzbl -14(%esi), %edx - sub %edx, %eax - RETURN - - .p2align 4 -L(Byte19): - movzbl -13(%edi), %eax - movzbl -13(%esi), %edx - sub %edx, %eax - RETURN - - .p2align 4 -L(Byte20): - movzbl -12(%edi), %eax - movzbl -12(%esi), %edx - sub %edx, %eax - RETURN - - .p2align 4 -L(Byte21): - movzbl -11(%edi), %eax - movzbl -11(%esi), %edx - sub %edx, %eax - RETURN - - .p2align 4 -L(Byte22): - movzbl -10(%edi), %eax - movzbl -10(%esi), %edx - sub %edx, %eax - RETURN - - .p2align 4 -L(next_24_bytes): - lea 8(%edi), %edi - lea 8(%esi), %esi - test $0x01, %dh - jnz L(Byte16) - - test $0x02, %dh - jnz L(Byte17) - - test $0x04, %dh - jnz L(Byte18) - - test $0x08, %dh - jnz L(Byte19) - - test $0x10, %dh - jnz L(Byte20) - - test $0x20, %dh - jnz L(Byte21) - - test $0x40, %dh - jnz L(Byte22) - - .p2align 4 -L(Byte31): - movzbl -9(%edi), %eax - movzbl -9(%esi), %edx - sub %edx, %eax - RETURN_END -# else - -/* special for wmemcmp */ - xor %eax, %eax - test %dl, %dl - jz L(next_two_double_words) - and $15, %dl - jz L(second_double_word) - mov -16(%edi), %eax - cmp -16(%esi), %eax - jne L(nequal) - RETURN - - .p2align 4 -L(second_double_word): - mov -12(%edi), %eax - cmp -12(%esi), %eax - jne L(nequal) - RETURN - - .p2align 4 -L(next_two_double_words): - and $15, %dh - jz L(fourth_double_word) - mov -8(%edi), %eax - cmp -8(%esi), %eax - jne L(nequal) - RETURN - - .p2align 4 -L(fourth_double_word): - mov -4(%edi), %eax - cmp -4(%esi), %eax - jne L(nequal) - RETURN - - .p2align 4 -L(nequal): - mov $1, %eax - jg L(nequal_bigger) - neg %eax - RETURN - - .p2align 4 -L(nequal_bigger): - RETURN_END -# endif - - CFI_PUSH (%ebx) - - .p2align 4 -L(more8bytes): - cmp $16, %ecx - jae L(more16bytes) - cmp $8, %ecx - je L(8bytes) -# ifndef USE_AS_WMEMCMP - cmp $9, %ecx - je L(9bytes) - cmp $10, %ecx - je L(10bytes) - cmp $11, %ecx - je L(11bytes) - cmp $12, %ecx - je L(12bytes) - cmp $13, %ecx - je L(13bytes) - cmp $14, %ecx - je L(14bytes) - jmp L(15bytes) -# else - jmp L(12bytes) -# endif - - .p2align 4 -L(more16bytes): - cmp $24, %ecx - jae L(more24bytes) - cmp $16, %ecx - je L(16bytes) -# ifndef USE_AS_WMEMCMP - cmp $17, %ecx - je L(17bytes) - cmp $18, %ecx - je L(18bytes) - cmp $19, %ecx - je L(19bytes) - cmp $20, %ecx - je L(20bytes) - cmp $21, %ecx - je L(21bytes) - cmp $22, %ecx - je L(22bytes) - jmp L(23bytes) -# else - jmp L(20bytes) -# endif - - .p2align 4 -L(more24bytes): - cmp $32, %ecx - jae L(more32bytes) - cmp $24, %ecx - je L(24bytes) -# ifndef USE_AS_WMEMCMP - cmp $25, %ecx - je L(25bytes) - cmp $26, %ecx - je L(26bytes) - cmp $27, %ecx - je L(27bytes) - cmp $28, %ecx - je L(28bytes) - cmp $29, %ecx - je L(29bytes) - cmp $30, %ecx - je L(30bytes) - jmp L(31bytes) -# else - jmp L(28bytes) -# endif - - .p2align 4 -L(more32bytes): - cmp $40, %ecx - jae L(more40bytes) - cmp $32, %ecx - je L(32bytes) -# ifndef USE_AS_WMEMCMP - cmp $33, %ecx - je L(33bytes) - cmp $34, %ecx - je L(34bytes) - cmp $35, %ecx - je L(35bytes) - cmp $36, %ecx - je L(36bytes) - cmp $37, %ecx - je L(37bytes) - cmp $38, %ecx - je L(38bytes) - jmp L(39bytes) -# else - jmp L(36bytes) -# endif - - .p2align 4 -L(less48bytes): - cmp $8, %ecx - jae L(more8bytes) -# ifndef USE_AS_WMEMCMP - cmp $2, %ecx - je L(2bytes) - cmp $3, %ecx - je L(3bytes) - cmp $4, %ecx - je L(4bytes) - cmp $5, %ecx - je L(5bytes) - cmp $6, %ecx - je L(6bytes) - jmp L(7bytes) -# else - jmp L(4bytes) -# endif - - .p2align 4 -L(more40bytes): - cmp $40, %ecx - je L(40bytes) -# ifndef USE_AS_WMEMCMP - cmp $41, %ecx - je L(41bytes) - cmp $42, %ecx - je L(42bytes) - cmp $43, %ecx - je L(43bytes) - cmp $44, %ecx - je L(44bytes) - cmp $45, %ecx - je L(45bytes) - cmp $46, %ecx - je L(46bytes) - jmp L(47bytes) - - .p2align 4 -L(44bytes): - mov -44(%eax), %ecx - mov -44(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(40bytes): - mov -40(%eax), %ecx - mov -40(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(36bytes): - mov -36(%eax), %ecx - mov -36(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(32bytes): - mov -32(%eax), %ecx - mov -32(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(28bytes): - mov -28(%eax), %ecx - mov -28(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(24bytes): - mov -24(%eax), %ecx - mov -24(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(20bytes): - mov -20(%eax), %ecx - mov -20(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(16bytes): - mov -16(%eax), %ecx - mov -16(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(12bytes): - mov -12(%eax), %ecx - mov -12(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(8bytes): - mov -8(%eax), %ecx - mov -8(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(4bytes): - mov -4(%eax), %ecx - mov -4(%edx), %ebx - cmp %ebx, %ecx - mov $0, %eax - jne L(find_diff) - POP (%ebx) - ret - CFI_PUSH (%ebx) -# else - .p2align 4 -L(44bytes): - mov -44(%eax), %ecx - cmp -44(%edx), %ecx - jne L(find_diff) -L(40bytes): - mov -40(%eax), %ecx - cmp -40(%edx), %ecx - jne L(find_diff) -L(36bytes): - mov -36(%eax), %ecx - cmp -36(%edx), %ecx - jne L(find_diff) -L(32bytes): - mov -32(%eax), %ecx - cmp -32(%edx), %ecx - jne L(find_diff) -L(28bytes): - mov -28(%eax), %ecx - cmp -28(%edx), %ecx - jne L(find_diff) -L(24bytes): - mov -24(%eax), %ecx - cmp -24(%edx), %ecx - jne L(find_diff) -L(20bytes): - mov -20(%eax), %ecx - cmp -20(%edx), %ecx - jne L(find_diff) -L(16bytes): - mov -16(%eax), %ecx - cmp -16(%edx), %ecx - jne L(find_diff) -L(12bytes): - mov -12(%eax), %ecx - cmp -12(%edx), %ecx - jne L(find_diff) -L(8bytes): - mov -8(%eax), %ecx - cmp -8(%edx), %ecx - jne L(find_diff) -L(4bytes): - mov -4(%eax), %ecx - xor %eax, %eax - cmp -4(%edx), %ecx - jne L(find_diff) - POP (%ebx) - ret - CFI_PUSH (%ebx) -# endif - -# ifndef USE_AS_WMEMCMP - - .p2align 4 -L(45bytes): - mov -45(%eax), %ecx - mov -45(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(41bytes): - mov -41(%eax), %ecx - mov -41(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(37bytes): - mov -37(%eax), %ecx - mov -37(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(33bytes): - mov -33(%eax), %ecx - mov -33(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(29bytes): - mov -29(%eax), %ecx - mov -29(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(25bytes): - mov -25(%eax), %ecx - mov -25(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(21bytes): - mov -21(%eax), %ecx - mov -21(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(17bytes): - mov -17(%eax), %ecx - mov -17(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(13bytes): - mov -13(%eax), %ecx - mov -13(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(9bytes): - mov -9(%eax), %ecx - mov -9(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(5bytes): - mov -5(%eax), %ecx - mov -5(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) - movzbl -1(%eax), %ecx - cmp -1(%edx), %cl - mov $0, %eax - jne L(end) - POP (%ebx) - ret - CFI_PUSH (%ebx) - - .p2align 4 -L(46bytes): - mov -46(%eax), %ecx - mov -46(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(42bytes): - mov -42(%eax), %ecx - mov -42(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(38bytes): - mov -38(%eax), %ecx - mov -38(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(34bytes): - mov -34(%eax), %ecx - mov -34(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(30bytes): - mov -30(%eax), %ecx - mov -30(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(26bytes): - mov -26(%eax), %ecx - mov -26(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(22bytes): - mov -22(%eax), %ecx - mov -22(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(18bytes): - mov -18(%eax), %ecx - mov -18(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(14bytes): - mov -14(%eax), %ecx - mov -14(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(10bytes): - mov -10(%eax), %ecx - mov -10(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(6bytes): - mov -6(%eax), %ecx - mov -6(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(2bytes): - movzwl -2(%eax), %ecx - movzwl -2(%edx), %ebx - cmp %bl, %cl - jne L(end) - cmp %bh, %ch - mov $0, %eax - jne L(end) - POP (%ebx) - ret - CFI_PUSH (%ebx) - - .p2align 4 -L(47bytes): - movl -47(%eax), %ecx - movl -47(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(43bytes): - movl -43(%eax), %ecx - movl -43(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(39bytes): - movl -39(%eax), %ecx - movl -39(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(35bytes): - movl -35(%eax), %ecx - movl -35(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(31bytes): - movl -31(%eax), %ecx - movl -31(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(27bytes): - movl -27(%eax), %ecx - movl -27(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(23bytes): - movl -23(%eax), %ecx - movl -23(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(19bytes): - movl -19(%eax), %ecx - movl -19(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(15bytes): - movl -15(%eax), %ecx - movl -15(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(11bytes): - movl -11(%eax), %ecx - movl -11(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(7bytes): - movl -7(%eax), %ecx - movl -7(%edx), %ebx - cmp %ebx, %ecx - jne L(find_diff) -L(3bytes): - movzwl -3(%eax), %ecx - movzwl -3(%edx), %ebx - cmpb %bl, %cl - jne L(end) - cmp %bx, %cx - jne L(end) - movzbl -1(%eax), %eax - cmpb -1(%edx), %al - mov $0, %eax - jne L(end) - POP (%ebx) - ret - CFI_PUSH (%ebx) - - .p2align 4 -L(find_diff): - cmpb %bl, %cl - jne L(end) - cmp %bx, %cx - jne L(end) - shr $16,%ecx - shr $16,%ebx - cmp %bl, %cl - jne L(end) - cmp %bx, %cx - - .p2align 4 -L(end): - POP (%ebx) - mov $1, %eax - ja L(bigger) - neg %eax -L(bigger): - ret -# else - -/* for wmemcmp */ - .p2align 4 -L(find_diff): - POP (%ebx) - mov $1, %eax - jg L(find_diff_bigger) - neg %eax - ret - - .p2align 4 -L(find_diff_bigger): - ret - -# endif -END (MEMCMP) -#endif diff --git a/sysdeps/i386/i686/multiarch/memcmp.S b/sysdeps/i386/i686/multiarch/memcmp.S deleted file mode 100644 index 1fc5994a17..0000000000 --- a/sysdeps/i386/i686/multiarch/memcmp.S +++ /dev/null @@ -1,62 +0,0 @@ -/* Multiple versions of memcmp - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in libc. */ -#if IS_IN (libc) - .text -ENTRY(memcmp) - .type memcmp, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__memcmp_ia32) - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__memcmp_ssse3) - HAS_CPU_FEATURE (SSE4_2) - jz 2f - LOAD_FUNC_GOT_EAX (__memcmp_sse4_2) -2: ret -END(memcmp) - -# undef ENTRY -# define ENTRY(name) \ - .type __memcmp_ia32, @function; \ - .p2align 4; \ - .globl __memcmp_ia32; \ - .hidden __memcmp_ia32; \ - __memcmp_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __memcmp_ia32, .-__memcmp_ia32 - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_memcmp; __GI_memcmp = __memcmp_ia32 -# endif -#endif - -#include "../memcmp.S" diff --git a/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S deleted file mode 100644 index 2fe2072cb1..0000000000 --- a/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S +++ /dev/null @@ -1,681 +0,0 @@ -/* memcpy optimized with SSE2 unaligned memory access instructions. - Copyright (C) 2014-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) \ - && (defined SHARED \ - || defined USE_AS_MEMMOVE \ - || !defined USE_MULTIARCH) - -# include <sysdep.h> -# include "asm-syntax.h" - -# ifndef MEMCPY -# define MEMCPY __memcpy_sse2_unaligned -# define MEMCPY_CHK __memcpy_chk_sse2_unaligned -# endif - -# ifdef USE_AS_BCOPY -# define SRC PARMS -# define DEST SRC+4 -# define LEN DEST+4 -# else -# define DEST PARMS -# define SRC DEST+4 -# define LEN SRC+4 -# endif - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 8 /* Preserve EBX. */ -# define ENTRANCE PUSH (%ebx); -# define RETURN_END POP (%ebx); ret -# define RETURN RETURN_END; CFI_PUSH (%ebx) - - .section .text.sse2,"ax",@progbits -# if !defined USE_AS_BCOPY -ENTRY (MEMCPY_CHK) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END (MEMCPY_CHK) -# endif - -ENTRY (MEMCPY) - ENTRANCE - movl LEN(%esp), %ecx - movl SRC(%esp), %eax - movl DEST(%esp), %edx - cmp %edx, %eax - -# ifdef USE_AS_MEMMOVE - jg L(check_forward) - -L(mm_len_0_or_more_backward): -/* Now do checks for lengths. We do [0..16], [16..32], [32..64], [64..128] - separately. */ - cmp $16, %ecx - jbe L(mm_len_0_16_bytes_backward) - - cmpl $32, %ecx - jg L(mm_len_32_or_more_backward) - -/* Copy [0..32] and return. */ - movdqu (%eax), %xmm0 - movdqu -16(%eax, %ecx), %xmm1 - movdqu %xmm0, (%edx) - movdqu %xmm1, -16(%edx, %ecx) - jmp L(return) - -L(mm_len_32_or_more_backward): - cmpl $64, %ecx - jg L(mm_len_64_or_more_backward) - -/* Copy [0..64] and return. */ - movdqu (%eax), %xmm0 - movdqu 16(%eax), %xmm1 - movdqu -16(%eax, %ecx), %xmm2 - movdqu -32(%eax, %ecx), %xmm3 - movdqu %xmm0, (%edx) - movdqu %xmm1, 16(%edx) - movdqu %xmm2, -16(%edx, %ecx) - movdqu %xmm3, -32(%edx, %ecx) - jmp L(return) - -L(mm_len_64_or_more_backward): - cmpl $128, %ecx - jg L(mm_len_128_or_more_backward) - -/* Copy [0..128] and return. */ - movdqu (%eax), %xmm0 - movdqu 16(%eax), %xmm1 - movdqu 32(%eax), %xmm2 - movdqu 48(%eax), %xmm3 - movdqu -64(%eax, %ecx), %xmm4 - movdqu -48(%eax, %ecx), %xmm5 - movdqu -32(%eax, %ecx), %xmm6 - movdqu -16(%eax, %ecx), %xmm7 - movdqu %xmm0, (%edx) - movdqu %xmm1, 16(%edx) - movdqu %xmm2, 32(%edx) - movdqu %xmm3, 48(%edx) - movdqu %xmm4, -64(%edx, %ecx) - movdqu %xmm5, -48(%edx, %ecx) - movdqu %xmm6, -32(%edx, %ecx) - movdqu %xmm7, -16(%edx, %ecx) - jmp L(return) - -L(mm_len_128_or_more_backward): - add %ecx, %eax - cmp %edx, %eax - movl SRC(%esp), %eax - jle L(forward) - PUSH (%esi) - PUSH (%edi) - PUSH (%ebx) - -/* Aligning the address of destination. */ - movdqu (%eax), %xmm4 - movdqu 16(%eax), %xmm5 - movdqu 32(%eax), %xmm6 - movdqu 48(%eax), %xmm7 - leal (%edx, %ecx), %esi - movdqu -16(%eax, %ecx), %xmm0 - subl $16, %esp - movdqu %xmm0, (%esp) - mov %ecx, %edi - movl %esi, %ecx - andl $-16, %ecx - leal (%ecx), %ebx - subl %edx, %ebx - leal (%eax, %ebx), %eax - shrl $6, %ebx - -# ifdef SHARED_CACHE_SIZE_HALF - cmp $SHARED_CACHE_SIZE_HALF, %edi -# else -# ifdef SHARED - PUSH (%ebx) - SETUP_PIC_REG (bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %edi - POP (%ebx) -# else - cmp __x86_shared_cache_size_half, %edi -# endif -# endif - jae L(mm_large_page_loop_backward) - - .p2align 4 -L(mm_main_loop_backward): - - prefetcht0 -128(%eax) - - movdqu -64(%eax), %xmm0 - movdqu -48(%eax), %xmm1 - movdqu -32(%eax), %xmm2 - movdqu -16(%eax), %xmm3 - movaps %xmm0, -64(%ecx) - subl $64, %eax - movaps %xmm1, -48(%ecx) - movaps %xmm2, -32(%ecx) - movaps %xmm3, -16(%ecx) - subl $64, %ecx - sub $1, %ebx - jnz L(mm_main_loop_backward) - movdqu (%esp), %xmm0 - addl $16, %esp - movdqu %xmm0, -16(%esi) - movdqu %xmm4, (%edx) - movdqu %xmm5, 16(%edx) - movdqu %xmm6, 32(%edx) - movdqu %xmm7, 48(%edx) - POP (%ebx) - jmp L(mm_return_pop_all) - -/* Copy [0..16] and return. */ -L(mm_len_0_16_bytes_backward): - testb $24, %cl - jnz L(mm_len_9_16_bytes_backward) - testb $4, %cl - .p2align 4,,5 - jnz L(mm_len_5_8_bytes_backward) - testl %ecx, %ecx - .p2align 4,,2 - je L(return) - testb $2, %cl - .p2align 4,,1 - jne L(mm_len_3_4_bytes_backward) - movzbl -1(%eax,%ecx), %ebx - movzbl (%eax), %eax - movb %bl, -1(%edx,%ecx) - movb %al, (%edx) - jmp L(return) - -L(mm_len_3_4_bytes_backward): - movzwl -2(%eax,%ecx), %ebx - movzwl (%eax), %eax - movw %bx, -2(%edx,%ecx) - movw %ax, (%edx) - jmp L(return) - -L(mm_len_9_16_bytes_backward): - PUSH (%esi) - movl -4(%eax,%ecx), %ebx - movl -8(%eax,%ecx), %esi - movl %ebx, -4(%edx,%ecx) - movl %esi, -8(%edx,%ecx) - subl $8, %ecx - POP (%esi) - jmp L(mm_len_0_16_bytes_backward) - -L(mm_len_5_8_bytes_backward): - movl (%eax), %ebx - movl -4(%eax,%ecx), %eax - movl %ebx, (%edx) - movl %eax, -4(%edx,%ecx) - jmp L(return) - -/* Big length copy backward part. */ - .p2align 4 -L(mm_large_page_loop_backward): - movdqu -64(%eax), %xmm0 - movdqu -48(%eax), %xmm1 - movdqu -32(%eax), %xmm2 - movdqu -16(%eax), %xmm3 - movntdq %xmm0, -64(%ecx) - subl $64, %eax - movntdq %xmm1, -48(%ecx) - movntdq %xmm2, -32(%ecx) - movntdq %xmm3, -16(%ecx) - subl $64, %ecx - sub $1, %ebx - jnz L(mm_large_page_loop_backward) - sfence - movdqu (%esp), %xmm0 - addl $16, %esp - movdqu %xmm0, -16(%esi) - movdqu %xmm4, (%edx) - movdqu %xmm5, 16(%edx) - movdqu %xmm6, 32(%edx) - movdqu %xmm7, 48(%edx) - POP (%ebx) - jmp L(mm_return_pop_all) - -L(check_forward): - add %edx, %ecx - cmp %eax, %ecx - movl LEN(%esp), %ecx - jle L(forward) - -/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128] - separately. */ - cmp $16, %ecx - jbe L(mm_len_0_16_bytes_forward) - - cmpl $32, %ecx - ja L(mm_len_32_or_more_forward) - -/* Copy [0..32] and return. */ - movdqu (%eax), %xmm0 - movdqu -16(%eax, %ecx), %xmm1 - movdqu %xmm0, (%edx) - movdqu %xmm1, -16(%edx, %ecx) - jmp L(return) - -L(mm_len_32_or_more_forward): - cmpl $64, %ecx - ja L(mm_len_64_or_more_forward) - -/* Copy [0..64] and return. */ - movdqu (%eax), %xmm0 - movdqu 16(%eax), %xmm1 - movdqu -16(%eax, %ecx), %xmm2 - movdqu -32(%eax, %ecx), %xmm3 - movdqu %xmm0, (%edx) - movdqu %xmm1, 16(%edx) - movdqu %xmm2, -16(%edx, %ecx) - movdqu %xmm3, -32(%edx, %ecx) - jmp L(return) - -L(mm_len_64_or_more_forward): - cmpl $128, %ecx - ja L(mm_len_128_or_more_forward) - -/* Copy [0..128] and return. */ - movdqu (%eax), %xmm0 - movdqu 16(%eax), %xmm1 - movdqu 32(%eax), %xmm2 - movdqu 48(%eax), %xmm3 - movdqu -64(%eax, %ecx), %xmm4 - movdqu -48(%eax, %ecx), %xmm5 - movdqu -32(%eax, %ecx), %xmm6 - movdqu -16(%eax, %ecx), %xmm7 - movdqu %xmm0, (%edx) - movdqu %xmm1, 16(%edx) - movdqu %xmm2, 32(%edx) - movdqu %xmm3, 48(%edx) - movdqu %xmm4, -64(%edx, %ecx) - movdqu %xmm5, -48(%edx, %ecx) - movdqu %xmm6, -32(%edx, %ecx) - movdqu %xmm7, -16(%edx, %ecx) - jmp L(return) - -L(mm_len_128_or_more_forward): - PUSH (%esi) - PUSH (%edi) - PUSH (%ebx) - -/* Aligning the address of destination. */ - movdqu -16(%eax, %ecx), %xmm4 - movdqu -32(%eax, %ecx), %xmm5 - movdqu -48(%eax, %ecx), %xmm6 - movdqu -64(%eax, %ecx), %xmm7 - leal (%edx, %ecx), %esi - movdqu (%eax), %xmm0 - subl $16, %esp - movdqu %xmm0, (%esp) - mov %ecx, %edi - leal 16(%edx), %ecx - andl $-16, %ecx - movl %ecx, %ebx - subl %edx, %ebx - addl %ebx, %eax - movl %esi, %ebx - subl %ecx, %ebx - shrl $6, %ebx - -# ifdef SHARED_CACHE_SIZE_HALF - cmp $SHARED_CACHE_SIZE_HALF, %edi -# else -# ifdef SHARED - PUSH (%ebx) - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %edi - POP (%ebx) -# else - cmp __x86_shared_cache_size_half, %edi -# endif -# endif - jae L(mm_large_page_loop_forward) - - .p2align 4 -L(mm_main_loop_forward): - - prefetcht0 128(%eax) - - movdqu (%eax), %xmm0 - movdqu 16(%eax), %xmm1 - movdqu 32(%eax), %xmm2 - movdqu 48(%eax), %xmm3 - movdqa %xmm0, (%ecx) - addl $64, %eax - movaps %xmm1, 16(%ecx) - movaps %xmm2, 32(%ecx) - movaps %xmm3, 48(%ecx) - addl $64, %ecx - sub $1, %ebx - jnz L(mm_main_loop_forward) - movdqu (%esp), %xmm0 - addl $16, %esp - movdqu %xmm0, (%edx) - movdqu %xmm4, -16(%esi) - movdqu %xmm5, -32(%esi) - movdqu %xmm6, -48(%esi) - movdqu %xmm7, -64(%esi) - POP (%ebx) - jmp L(mm_return_pop_all) - -L(mm_len_0_16_bytes_forward): - testb $24, %cl - jne L(mm_len_9_16_bytes_forward) - testb $4, %cl - .p2align 4,,5 - jne L(mm_len_5_8_bytes_forward) - testl %ecx, %ecx - .p2align 4,,2 - je L(return) - testb $2, %cl - .p2align 4,,1 - jne L(mm_len_2_4_bytes_forward) - movzbl -1(%eax,%ecx), %ebx - movzbl (%eax), %eax - movb %bl, -1(%edx,%ecx) - movb %al, (%edx) - jmp L(return) - -L(mm_len_2_4_bytes_forward): - movzwl -2(%eax,%ecx), %ebx - movzwl (%eax), %eax - movw %bx, -2(%edx,%ecx) - movw %ax, (%edx) - jmp L(return) - -L(mm_len_5_8_bytes_forward): - movl (%eax), %ebx - movl -4(%eax,%ecx), %eax - movl %ebx, (%edx) - movl %eax, -4(%edx,%ecx) - jmp L(return) - -L(mm_len_9_16_bytes_forward): - movq (%eax), %xmm0 - movq -8(%eax, %ecx), %xmm1 - movq %xmm0, (%edx) - movq %xmm1, -8(%edx, %ecx) - jmp L(return) - -L(mm_return_pop_all): - movl %edx, %eax - POP (%edi) - POP (%esi) - RETURN - -/* Big length copy forward part. */ - .p2align 4 -L(mm_large_page_loop_forward): - movdqu (%eax), %xmm0 - movdqu 16(%eax), %xmm1 - movdqu 32(%eax), %xmm2 - movdqu 48(%eax), %xmm3 - movntdq %xmm0, (%ecx) - addl $64, %eax - movntdq %xmm1, 16(%ecx) - movntdq %xmm2, 32(%ecx) - movntdq %xmm3, 48(%ecx) - addl $64, %ecx - sub $1, %ebx - jnz L(mm_large_page_loop_forward) - sfence - movdqu (%esp), %xmm0 - addl $16, %esp - movdqu %xmm0, (%edx) - movdqu %xmm4, -16(%esi) - movdqu %xmm5, -32(%esi) - movdqu %xmm6, -48(%esi) - movdqu %xmm7, -64(%esi) - POP (%ebx) - jmp L(mm_return_pop_all) -# endif - -L(forward): - cmp $16, %ecx - jbe L(len_0_16_bytes) - -# ifdef SHARED_CACHE_SIZE_HALF - cmp $SHARED_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_shared_cache_size_half, %ecx -# endif -# endif - jae L(large_page) - - movdqu (%eax), %xmm0 - movdqu -16(%eax, %ecx), %xmm1 - cmpl $32, %ecx - movdqu %xmm0, (%edx) - movdqu %xmm1, -16(%edx, %ecx) - jbe L(return) - - movdqu 16(%eax), %xmm0 - movdqu -32(%eax, %ecx), %xmm1 - cmpl $64, %ecx - movdqu %xmm0, 16(%edx) - movdqu %xmm1, -32(%edx, %ecx) - jbe L(return) - - movdqu 32(%eax), %xmm0 - movdqu 48(%eax), %xmm1 - movdqu -48(%eax, %ecx), %xmm2 - movdqu -64(%eax, %ecx), %xmm3 - cmpl $128, %ecx - movdqu %xmm0, 32(%edx) - movdqu %xmm1, 48(%edx) - movdqu %xmm2, -48(%edx, %ecx) - movdqu %xmm3, -64(%edx, %ecx) - jbe L(return) - -/* Now the main loop: we align the address of the destination. */ - leal 64(%edx), %ebx - andl $-64, %ebx - - addl %edx, %ecx - andl $-64, %ecx - - subl %edx, %eax - -/* We should stop two iterations before the termination - (in order not to misprefetch). */ - subl $64, %ecx - cmpl %ebx, %ecx - je L(main_loop_just_one_iteration) - - subl $64, %ecx - cmpl %ebx, %ecx - je L(main_loop_last_two_iterations) - - .p2align 4 -L(main_loop_cache): - - prefetcht0 128(%ebx, %eax) - - movdqu (%ebx, %eax), %xmm0 - movdqu 16(%ebx, %eax), %xmm1 - movdqu 32(%ebx, %eax), %xmm2 - movdqu 48(%ebx, %eax), %xmm3 - movdqa %xmm0, (%ebx) - movaps %xmm1, 16(%ebx) - movaps %xmm2, 32(%ebx) - movaps %xmm3, 48(%ebx) - lea 64(%ebx), %ebx - cmpl %ebx, %ecx - jne L(main_loop_cache) - -L(main_loop_last_two_iterations): - movdqu (%ebx, %eax), %xmm0 - movdqu 16(%ebx, %eax), %xmm1 - movdqu 32(%ebx, %eax), %xmm2 - movdqu 48(%ebx, %eax), %xmm3 - movdqu 64(%ebx, %eax), %xmm4 - movdqu 80(%ebx, %eax), %xmm5 - movdqu 96(%ebx, %eax), %xmm6 - movdqu 112(%ebx, %eax), %xmm7 - movdqa %xmm0, (%ebx) - movaps %xmm1, 16(%ebx) - movaps %xmm2, 32(%ebx) - movaps %xmm3, 48(%ebx) - movaps %xmm4, 64(%ebx) - movaps %xmm5, 80(%ebx) - movaps %xmm6, 96(%ebx) - movaps %xmm7, 112(%ebx) - jmp L(return) - -L(main_loop_just_one_iteration): - movdqu (%ebx, %eax), %xmm0 - movdqu 16(%ebx, %eax), %xmm1 - movdqu 32(%ebx, %eax), %xmm2 - movdqu 48(%ebx, %eax), %xmm3 - movdqa %xmm0, (%ebx) - movaps %xmm1, 16(%ebx) - movaps %xmm2, 32(%ebx) - movaps %xmm3, 48(%ebx) - jmp L(return) - -L(large_page): - movdqu (%eax), %xmm0 - movdqu 16(%eax), %xmm1 - movdqu 32(%eax), %xmm2 - movdqu 48(%eax), %xmm3 - movdqu -64(%eax, %ecx), %xmm4 - movdqu -48(%eax, %ecx), %xmm5 - movdqu -32(%eax, %ecx), %xmm6 - movdqu -16(%eax, %ecx), %xmm7 - movdqu %xmm0, (%edx) - movdqu %xmm1, 16(%edx) - movdqu %xmm2, 32(%edx) - movdqu %xmm3, 48(%edx) - movdqu %xmm4, -64(%edx, %ecx) - movdqu %xmm5, -48(%edx, %ecx) - movdqu %xmm6, -32(%edx, %ecx) - movdqu %xmm7, -16(%edx, %ecx) - - movdqu 64(%eax), %xmm0 - movdqu 80(%eax), %xmm1 - movdqu 96(%eax), %xmm2 - movdqu 112(%eax), %xmm3 - movdqu -128(%eax, %ecx), %xmm4 - movdqu -112(%eax, %ecx), %xmm5 - movdqu -96(%eax, %ecx), %xmm6 - movdqu -80(%eax, %ecx), %xmm7 - movdqu %xmm0, 64(%edx) - movdqu %xmm1, 80(%edx) - movdqu %xmm2, 96(%edx) - movdqu %xmm3, 112(%edx) - movdqu %xmm4, -128(%edx, %ecx) - movdqu %xmm5, -112(%edx, %ecx) - movdqu %xmm6, -96(%edx, %ecx) - movdqu %xmm7, -80(%edx, %ecx) - -/* Now the main loop with non temporal stores. We align - the address of the destination. */ - leal 128(%edx), %ebx - andl $-128, %ebx - - addl %edx, %ecx - andl $-128, %ecx - - subl %edx, %eax - - .p2align 4 -L(main_loop_large_page): - movdqu (%ebx, %eax), %xmm0 - movdqu 16(%ebx, %eax), %xmm1 - movdqu 32(%ebx, %eax), %xmm2 - movdqu 48(%ebx, %eax), %xmm3 - movdqu 64(%ebx, %eax), %xmm4 - movdqu 80(%ebx, %eax), %xmm5 - movdqu 96(%ebx, %eax), %xmm6 - movdqu 112(%ebx, %eax), %xmm7 - movntdq %xmm0, (%ebx) - movntdq %xmm1, 16(%ebx) - movntdq %xmm2, 32(%ebx) - movntdq %xmm3, 48(%ebx) - movntdq %xmm4, 64(%ebx) - movntdq %xmm5, 80(%ebx) - movntdq %xmm6, 96(%ebx) - movntdq %xmm7, 112(%ebx) - lea 128(%ebx), %ebx - cmpl %ebx, %ecx - jne L(main_loop_large_page) - sfence - jmp L(return) - -L(len_0_16_bytes): - testb $24, %cl - jne L(len_9_16_bytes) - testb $4, %cl - .p2align 4,,5 - jne L(len_5_8_bytes) - testl %ecx, %ecx - .p2align 4,,2 - je L(return) - movzbl (%eax), %ebx - testb $2, %cl - movb %bl, (%edx) - je L(return) - movzwl -2(%eax,%ecx), %ebx - movw %bx, -2(%edx,%ecx) - jmp L(return) - -L(len_9_16_bytes): - movq (%eax), %xmm0 - movq -8(%eax, %ecx), %xmm1 - movq %xmm0, (%edx) - movq %xmm1, -8(%edx, %ecx) - jmp L(return) - -L(len_5_8_bytes): - movl (%eax), %ebx - movl %ebx, (%edx) - movl -4(%eax,%ecx), %ebx - movl %ebx, -4(%edx,%ecx) - -L(return): - movl %edx, %eax -# if !defined USE_AS_BCOPY && defined USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif - RETURN - -END (MEMCPY) -#endif diff --git a/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S b/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S deleted file mode 100644 index 687e083147..0000000000 --- a/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S +++ /dev/null @@ -1,1809 +0,0 @@ -/* memcpy with SSSE3 and REP string. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -#if IS_IN (libc) \ - && (defined SHARED \ - || defined USE_AS_MEMMOVE \ - || !defined USE_MULTIARCH) - -#include "asm-syntax.h" - -#ifndef MEMCPY -# define MEMCPY __memcpy_ssse3_rep -# define MEMCPY_CHK __memcpy_chk_ssse3_rep -#endif - -#ifdef USE_AS_BCOPY -# define SRC PARMS -# define DEST SRC+4 -# define LEN DEST+4 -#else -# define DEST PARMS -# define SRC DEST+4 -# define LEN SRC+4 -#endif - -#define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -#define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -#define PUSH(REG) pushl REG; CFI_PUSH (REG) -#define POP(REG) popl REG; CFI_POP (REG) - -#ifdef SHARED -# define PARMS 8 /* Preserve EBX. */ -# define ENTRANCE PUSH (%ebx); -# define RETURN_END POP (%ebx); ret -# define RETURN RETURN_END; CFI_PUSH (%ebx) -# define JMPTBL(I, B) I - B - -/* Load an entry in a jump table into EBX and branch to it. TABLE is a - jump table with relative offsets. INDEX is a register contains the - index into the jump table. SCALE is the scale of INDEX. */ -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - /* We first load PC into EBX. */ \ - SETUP_PIC_REG(bx); \ - /* Get the address of the jump table. */ \ - addl $(TABLE - .), %ebx; \ - /* Get the entry and convert the relative offset to the \ - absolute address. */ \ - addl (%ebx,INDEX,SCALE), %ebx; \ - /* We loaded the jump table. Go. */ \ - jmp *%ebx - -# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) \ - addl $(TABLE - .), %ebx - -# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \ - addl (%ebx,INDEX,SCALE), %ebx; \ - /* We loaded the jump table. Go. */ \ - jmp *%ebx -#else -# define PARMS 4 -# define ENTRANCE -# define RETURN_END ret -# define RETURN RETURN_END -# define JMPTBL(I, B) I - -/* Branch to an entry in a jump table. TABLE is a jump table with - absolute offsets. INDEX is a register contains the index into the - jump table. SCALE is the scale of INDEX. */ -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - jmp *TABLE(,INDEX,SCALE) - -# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) - -# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \ - jmp *TABLE(,INDEX,SCALE) -#endif - - .section .text.ssse3,"ax",@progbits -#if !defined USE_AS_BCOPY -ENTRY (MEMCPY_CHK) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END (MEMCPY_CHK) -#endif -ENTRY (MEMCPY) - ENTRANCE - movl LEN(%esp), %ecx - movl SRC(%esp), %eax - movl DEST(%esp), %edx - -#ifdef USE_AS_MEMMOVE - cmp %eax, %edx - jb L(copy_forward) - je L(fwd_write_0bytes) - cmp $48, %ecx - jb L(bk_write_less48bytes) - add %ecx, %eax - cmp %eax, %edx - movl SRC(%esp), %eax - jb L(copy_backward) - -L(copy_forward): -#endif - cmp $48, %ecx - jae L(48bytesormore) - -L(fwd_write_less32bytes): -#ifndef USE_AS_MEMMOVE - cmp %dl, %al - jb L(bk_write) -#endif - add %ecx, %edx - add %ecx, %eax - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) -#ifndef USE_AS_MEMMOVE -L(bk_write): - BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) -#endif - - ALIGN (4) -/* ECX > 32 and EDX is 4 byte aligned. */ -L(48bytesormore): - movdqu (%eax), %xmm0 - PUSH (%edi) - movl %edx, %edi - and $-16, %edx - PUSH (%esi) - cfi_remember_state - add $16, %edx - movl %edi, %esi - sub %edx, %edi - add %edi, %ecx - sub %edi, %eax - -#ifdef SHARED_CACHE_SIZE_HALF - cmp $SHARED_CACHE_SIZE_HALF, %ecx -#else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_shared_cache_size_half, %ecx -# endif -#endif - - mov %eax, %edi - jae L(large_page) - and $0xf, %edi - jz L(shl_0) - - BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4) - - ALIGN (4) -L(shl_0): - movdqu %xmm0, (%esi) - xor %edi, %edi - cmp $127, %ecx - ja L(shl_0_gobble) - lea -32(%ecx), %ecx -L(shl_0_loop): - movdqa (%eax, %edi), %xmm0 - movdqa 16(%eax, %edi), %xmm1 - sub $32, %ecx - movdqa %xmm0, (%edx, %edi) - movdqa %xmm1, 16(%edx, %edi) - lea 32(%edi), %edi - jb L(shl_0_end) - - movdqa (%eax, %edi), %xmm0 - movdqa 16(%eax, %edi), %xmm1 - sub $32, %ecx - movdqa %xmm0, (%edx, %edi) - movdqa %xmm1, 16(%edx, %edi) - lea 32(%edi), %edi - jb L(shl_0_end) - - movdqa (%eax, %edi), %xmm0 - movdqa 16(%eax, %edi), %xmm1 - sub $32, %ecx - movdqa %xmm0, (%edx, %edi) - movdqa %xmm1, 16(%edx, %edi) - lea 32(%edi), %edi - jb L(shl_0_end) - - movdqa (%eax, %edi), %xmm0 - movdqa 16(%eax, %edi), %xmm1 - sub $32, %ecx - movdqa %xmm0, (%edx, %edi) - movdqa %xmm1, 16(%edx, %edi) - lea 32(%edi), %edi -L(shl_0_end): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - add %edi, %eax - POP (%esi) - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state -L(shl_0_gobble): - -#ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -#else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - mov __x86_data_cache_size_half@GOTOFF(%ebx), %edi -# else - mov __x86_data_cache_size_half, %edi -# endif -#endif - mov %edi, %esi - shr $3, %esi - sub %esi, %edi - cmp %edi, %ecx - jae L(shl_0_gobble_mem_start) - sub $128, %ecx - ALIGN (4) -L(shl_0_gobble_cache_loop): - movdqa (%eax), %xmm0 - movaps 0x10(%eax), %xmm1 - movaps 0x20(%eax), %xmm2 - movaps 0x30(%eax), %xmm3 - movaps 0x40(%eax), %xmm4 - movaps 0x50(%eax), %xmm5 - movaps 0x60(%eax), %xmm6 - movaps 0x70(%eax), %xmm7 - lea 0x80(%eax), %eax - sub $128, %ecx - movdqa %xmm0, (%edx) - movaps %xmm1, 0x10(%edx) - movaps %xmm2, 0x20(%edx) - movaps %xmm3, 0x30(%edx) - movaps %xmm4, 0x40(%edx) - movaps %xmm5, 0x50(%edx) - movaps %xmm6, 0x60(%edx) - movaps %xmm7, 0x70(%edx) - lea 0x80(%edx), %edx - - jae L(shl_0_gobble_cache_loop) - add $0x80, %ecx - cmp $0x40, %ecx - jb L(shl_0_cache_less_64bytes) - - movdqa (%eax), %xmm0 - sub $0x40, %ecx - movdqa 0x10(%eax), %xmm1 - - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - - movdqa 0x20(%eax), %xmm0 - movdqa 0x30(%eax), %xmm1 - add $0x40, %eax - - movdqa %xmm0, 0x20(%edx) - movdqa %xmm1, 0x30(%edx) - add $0x40, %edx -L(shl_0_cache_less_64bytes): - cmp $0x20, %ecx - jb L(shl_0_cache_less_32bytes) - movdqa (%eax), %xmm0 - sub $0x20, %ecx - movdqa 0x10(%eax), %xmm1 - add $0x20, %eax - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - add $0x20, %edx -L(shl_0_cache_less_32bytes): - cmp $0x10, %ecx - jb L(shl_0_cache_less_16bytes) - sub $0x10, %ecx - movdqa (%eax), %xmm0 - add $0x10, %eax - movdqa %xmm0, (%edx) - add $0x10, %edx -L(shl_0_cache_less_16bytes): - add %ecx, %edx - add %ecx, %eax - POP (%esi) - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_0_gobble_mem_start): - cmp %al, %dl - je L(copy_page_by_rep) - sub $128, %ecx -L(shl_0_gobble_mem_loop): - prefetchnta 0x1c0(%eax) - prefetchnta 0x280(%eax) - prefetchnta 0x1c0(%edx) - prefetchnta 0x280(%edx) - - movdqa (%eax), %xmm0 - movaps 0x10(%eax), %xmm1 - movaps 0x20(%eax), %xmm2 - movaps 0x30(%eax), %xmm3 - movaps 0x40(%eax), %xmm4 - movaps 0x50(%eax), %xmm5 - movaps 0x60(%eax), %xmm6 - movaps 0x70(%eax), %xmm7 - lea 0x80(%eax), %eax - sub $0x80, %ecx - movdqa %xmm0, (%edx) - movaps %xmm1, 0x10(%edx) - movaps %xmm2, 0x20(%edx) - movaps %xmm3, 0x30(%edx) - movaps %xmm4, 0x40(%edx) - movaps %xmm5, 0x50(%edx) - movaps %xmm6, 0x60(%edx) - movaps %xmm7, 0x70(%edx) - lea 0x80(%edx), %edx - - jae L(shl_0_gobble_mem_loop) - add $0x80, %ecx - cmp $0x40, %ecx - jb L(shl_0_mem_less_64bytes) - - movdqa (%eax), %xmm0 - sub $0x40, %ecx - movdqa 0x10(%eax), %xmm1 - - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - - movdqa 0x20(%eax), %xmm0 - movdqa 0x30(%eax), %xmm1 - add $0x40, %eax - - movdqa %xmm0, 0x20(%edx) - movdqa %xmm1, 0x30(%edx) - add $0x40, %edx -L(shl_0_mem_less_64bytes): - cmp $0x20, %ecx - jb L(shl_0_mem_less_32bytes) - movdqa (%eax), %xmm0 - sub $0x20, %ecx - movdqa 0x10(%eax), %xmm1 - add $0x20, %eax - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - add $0x20, %edx -L(shl_0_mem_less_32bytes): - cmp $0x10, %ecx - jb L(shl_0_mem_less_16bytes) - sub $0x10, %ecx - movdqa (%eax), %xmm0 - add $0x10, %eax - movdqa %xmm0, (%edx) - add $0x10, %edx -L(shl_0_mem_less_16bytes): - add %ecx, %edx - add %ecx, %eax - POP (%esi) - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_1): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $1, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_1_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $1, %xmm2, %xmm3 - palignr $1, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_1_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $1, %xmm2, %xmm3 - palignr $1, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_1_loop) - -L(shl_1_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 1(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_2): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $2, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_2_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $2, %xmm2, %xmm3 - palignr $2, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_2_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $2, %xmm2, %xmm3 - palignr $2, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_2_loop) - -L(shl_2_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 2(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_3): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $3, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_3_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $3, %xmm2, %xmm3 - palignr $3, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_3_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $3, %xmm2, %xmm3 - palignr $3, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_3_loop) - -L(shl_3_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 3(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_4): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $4, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_4_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $4, %xmm2, %xmm3 - palignr $4, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_4_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $4, %xmm2, %xmm3 - palignr $4, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_4_loop) - -L(shl_4_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 4(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_5): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $5, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_5_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $5, %xmm2, %xmm3 - palignr $5, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_5_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $5, %xmm2, %xmm3 - palignr $5, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_5_loop) - -L(shl_5_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 5(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_6): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $6, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_6_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $6, %xmm2, %xmm3 - palignr $6, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_6_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $6, %xmm2, %xmm3 - palignr $6, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_6_loop) - -L(shl_6_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 6(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_7): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $7, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_7_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $7, %xmm2, %xmm3 - palignr $7, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_7_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $7, %xmm2, %xmm3 - palignr $7, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_7_loop) - -L(shl_7_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 7(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_8): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $8, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_8_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $8, %xmm2, %xmm3 - palignr $8, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_8_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $8, %xmm2, %xmm3 - palignr $8, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_8_loop) - -L(shl_8_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 8(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_9): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $9, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_9_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $9, %xmm2, %xmm3 - palignr $9, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_9_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $9, %xmm2, %xmm3 - palignr $9, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_9_loop) - -L(shl_9_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 9(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_10): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $10, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_10_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $10, %xmm2, %xmm3 - palignr $10, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_10_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $10, %xmm2, %xmm3 - palignr $10, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_10_loop) - -L(shl_10_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 10(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_11): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $11, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_11_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $11, %xmm2, %xmm3 - palignr $11, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_11_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $11, %xmm2, %xmm3 - palignr $11, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_11_loop) - -L(shl_11_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 11(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_12): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $12, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_12_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $12, %xmm2, %xmm3 - palignr $12, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_12_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $12, %xmm2, %xmm3 - palignr $12, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_12_loop) - -L(shl_12_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 12(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_13): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $13, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_13_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $13, %xmm2, %xmm3 - palignr $13, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_13_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $13, %xmm2, %xmm3 - palignr $13, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_13_loop) - -L(shl_13_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 13(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_14): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $14, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_14_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $14, %xmm2, %xmm3 - palignr $14, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_14_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $14, %xmm2, %xmm3 - palignr $14, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_14_loop) - -L(shl_14_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 14(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(shl_15): - BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - sub $15, %eax - movaps (%eax), %xmm1 - xor %edi, %edi - sub $32, %ecx - movdqu %xmm0, (%esi) - POP (%esi) -L(shl_15_loop): - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $15, %xmm2, %xmm3 - palignr $15, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(shl_15_end) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $15, %xmm2, %xmm3 - palignr $15, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(shl_15_loop) - -L(shl_15_end): - add $32, %ecx - add %ecx, %edi - add %edi, %edx - lea 15(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - - - ALIGN (4) -L(fwd_write_44bytes): - movl -44(%eax), %ecx - movl %ecx, -44(%edx) -L(fwd_write_40bytes): - movl -40(%eax), %ecx - movl %ecx, -40(%edx) -L(fwd_write_36bytes): - movl -36(%eax), %ecx - movl %ecx, -36(%edx) -L(fwd_write_32bytes): - movl -32(%eax), %ecx - movl %ecx, -32(%edx) -L(fwd_write_28bytes): - movl -28(%eax), %ecx - movl %ecx, -28(%edx) -L(fwd_write_24bytes): - movl -24(%eax), %ecx - movl %ecx, -24(%edx) -L(fwd_write_20bytes): - movl -20(%eax), %ecx - movl %ecx, -20(%edx) -L(fwd_write_16bytes): - movl -16(%eax), %ecx - movl %ecx, -16(%edx) -L(fwd_write_12bytes): - movl -12(%eax), %ecx - movl %ecx, -12(%edx) -L(fwd_write_8bytes): - movl -8(%eax), %ecx - movl %ecx, -8(%edx) -L(fwd_write_4bytes): - movl -4(%eax), %ecx - movl %ecx, -4(%edx) -L(fwd_write_0bytes): -#ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -#endif - RETURN - - ALIGN (4) -L(fwd_write_5bytes): - movl -5(%eax), %ecx - movl -4(%eax), %eax - movl %ecx, -5(%edx) - movl %eax, -4(%edx) -#ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -#endif - RETURN - - ALIGN (4) -L(fwd_write_45bytes): - movl -45(%eax), %ecx - movl %ecx, -45(%edx) -L(fwd_write_41bytes): - movl -41(%eax), %ecx - movl %ecx, -41(%edx) -L(fwd_write_37bytes): - movl -37(%eax), %ecx - movl %ecx, -37(%edx) -L(fwd_write_33bytes): - movl -33(%eax), %ecx - movl %ecx, -33(%edx) -L(fwd_write_29bytes): - movl -29(%eax), %ecx - movl %ecx, -29(%edx) -L(fwd_write_25bytes): - movl -25(%eax), %ecx - movl %ecx, -25(%edx) -L(fwd_write_21bytes): - movl -21(%eax), %ecx - movl %ecx, -21(%edx) -L(fwd_write_17bytes): - movl -17(%eax), %ecx - movl %ecx, -17(%edx) -L(fwd_write_13bytes): - movl -13(%eax), %ecx - movl %ecx, -13(%edx) -L(fwd_write_9bytes): - movl -9(%eax), %ecx - movl %ecx, -9(%edx) - movl -5(%eax), %ecx - movl %ecx, -5(%edx) -L(fwd_write_1bytes): - movzbl -1(%eax), %ecx - movb %cl, -1(%edx) -#ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -#endif - RETURN - - ALIGN (4) -L(fwd_write_46bytes): - movl -46(%eax), %ecx - movl %ecx, -46(%edx) -L(fwd_write_42bytes): - movl -42(%eax), %ecx - movl %ecx, -42(%edx) -L(fwd_write_38bytes): - movl -38(%eax), %ecx - movl %ecx, -38(%edx) -L(fwd_write_34bytes): - movl -34(%eax), %ecx - movl %ecx, -34(%edx) -L(fwd_write_30bytes): - movl -30(%eax), %ecx - movl %ecx, -30(%edx) -L(fwd_write_26bytes): - movl -26(%eax), %ecx - movl %ecx, -26(%edx) -L(fwd_write_22bytes): - movl -22(%eax), %ecx - movl %ecx, -22(%edx) -L(fwd_write_18bytes): - movl -18(%eax), %ecx - movl %ecx, -18(%edx) -L(fwd_write_14bytes): - movl -14(%eax), %ecx - movl %ecx, -14(%edx) -L(fwd_write_10bytes): - movl -10(%eax), %ecx - movl %ecx, -10(%edx) -L(fwd_write_6bytes): - movl -6(%eax), %ecx - movl %ecx, -6(%edx) -L(fwd_write_2bytes): - movzwl -2(%eax), %ecx - movw %cx, -2(%edx) -#ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -#endif - RETURN - - ALIGN (4) -L(fwd_write_47bytes): - movl -47(%eax), %ecx - movl %ecx, -47(%edx) -L(fwd_write_43bytes): - movl -43(%eax), %ecx - movl %ecx, -43(%edx) -L(fwd_write_39bytes): - movl -39(%eax), %ecx - movl %ecx, -39(%edx) -L(fwd_write_35bytes): - movl -35(%eax), %ecx - movl %ecx, -35(%edx) -L(fwd_write_31bytes): - movl -31(%eax), %ecx - movl %ecx, -31(%edx) -L(fwd_write_27bytes): - movl -27(%eax), %ecx - movl %ecx, -27(%edx) -L(fwd_write_23bytes): - movl -23(%eax), %ecx - movl %ecx, -23(%edx) -L(fwd_write_19bytes): - movl -19(%eax), %ecx - movl %ecx, -19(%edx) -L(fwd_write_15bytes): - movl -15(%eax), %ecx - movl %ecx, -15(%edx) -L(fwd_write_11bytes): - movl -11(%eax), %ecx - movl %ecx, -11(%edx) -L(fwd_write_7bytes): - movl -7(%eax), %ecx - movl %ecx, -7(%edx) -L(fwd_write_3bytes): - movzwl -3(%eax), %ecx - movzbl -1(%eax), %eax - movw %cx, -3(%edx) - movb %al, -1(%edx) -#ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -#endif - RETURN_END - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(large_page): - movdqu (%eax), %xmm1 - movdqu %xmm0, (%esi) - movntdq %xmm1, (%edx) - add $0x10, %eax - add $0x10, %edx - sub $0x10, %ecx - cmp %al, %dl - je L(copy_page_by_rep) -L(large_page_loop_init): - POP (%esi) - sub $0x80, %ecx - POP (%edi) -L(large_page_loop): - prefetchnta 0x1c0(%eax) - prefetchnta 0x280(%eax) - movdqu (%eax), %xmm0 - movdqu 0x10(%eax), %xmm1 - movdqu 0x20(%eax), %xmm2 - movdqu 0x30(%eax), %xmm3 - movdqu 0x40(%eax), %xmm4 - movdqu 0x50(%eax), %xmm5 - movdqu 0x60(%eax), %xmm6 - movdqu 0x70(%eax), %xmm7 - lea 0x80(%eax), %eax - lfence - sub $0x80, %ecx - movntdq %xmm0, (%edx) - movntdq %xmm1, 0x10(%edx) - movntdq %xmm2, 0x20(%edx) - movntdq %xmm3, 0x30(%edx) - movntdq %xmm4, 0x40(%edx) - movntdq %xmm5, 0x50(%edx) - movntdq %xmm6, 0x60(%edx) - movntdq %xmm7, 0x70(%edx) - lea 0x80(%edx), %edx - jae L(large_page_loop) - add $0x80, %ecx - cmp $0x40, %ecx - jb L(large_page_less_64bytes) - - movdqu (%eax), %xmm0 - movdqu 0x10(%eax), %xmm1 - movdqu 0x20(%eax), %xmm2 - movdqu 0x30(%eax), %xmm3 - lea 0x40(%eax), %eax - - movntdq %xmm0, (%edx) - movntdq %xmm1, 0x10(%edx) - movntdq %xmm2, 0x20(%edx) - movntdq %xmm3, 0x30(%edx) - lea 0x40(%edx), %edx - sub $0x40, %ecx -L(large_page_less_64bytes): - cmp $32, %ecx - jb L(large_page_less_32bytes) - movdqu (%eax), %xmm0 - movdqu 0x10(%eax), %xmm1 - lea 0x20(%eax), %eax - movntdq %xmm0, (%edx) - movntdq %xmm1, 0x10(%edx) - lea 0x20(%edx), %edx - sub $0x20, %ecx -L(large_page_less_32bytes): - add %ecx, %edx - add %ecx, %eax - sfence - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - - cfi_restore_state - cfi_remember_state - ALIGN (4) -L(copy_page_by_rep): - mov %eax, %esi - mov %edx, %edi - mov %ecx, %edx - shr $2, %ecx - and $3, %edx - rep movsl - jz L(copy_page_by_rep_exit) - cmp $2, %edx - jb L(copy_page_by_rep_left_1) - movzwl (%esi), %eax - movw %ax, (%edi) - add $2, %esi - add $2, %edi - sub $2, %edx - jz L(copy_page_by_rep_exit) -L(copy_page_by_rep_left_1): - movzbl (%esi), %eax - movb %al, (%edi) -L(copy_page_by_rep_exit): - POP (%esi) - POP (%edi) -#ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -#endif - RETURN - - ALIGN (4) -L(bk_write_44bytes): - movl 40(%eax), %ecx - movl %ecx, 40(%edx) -L(bk_write_40bytes): - movl 36(%eax), %ecx - movl %ecx, 36(%edx) -L(bk_write_36bytes): - movl 32(%eax), %ecx - movl %ecx, 32(%edx) -L(bk_write_32bytes): - movl 28(%eax), %ecx - movl %ecx, 28(%edx) -L(bk_write_28bytes): - movl 24(%eax), %ecx - movl %ecx, 24(%edx) -L(bk_write_24bytes): - movl 20(%eax), %ecx - movl %ecx, 20(%edx) -L(bk_write_20bytes): - movl 16(%eax), %ecx - movl %ecx, 16(%edx) -L(bk_write_16bytes): - movl 12(%eax), %ecx - movl %ecx, 12(%edx) -L(bk_write_12bytes): - movl 8(%eax), %ecx - movl %ecx, 8(%edx) -L(bk_write_8bytes): - movl 4(%eax), %ecx - movl %ecx, 4(%edx) -L(bk_write_4bytes): - movl (%eax), %ecx - movl %ecx, (%edx) -L(bk_write_0bytes): -#ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -#endif - RETURN - - ALIGN (4) -L(bk_write_45bytes): - movl 41(%eax), %ecx - movl %ecx, 41(%edx) -L(bk_write_41bytes): - movl 37(%eax), %ecx - movl %ecx, 37(%edx) -L(bk_write_37bytes): - movl 33(%eax), %ecx - movl %ecx, 33(%edx) -L(bk_write_33bytes): - movl 29(%eax), %ecx - movl %ecx, 29(%edx) -L(bk_write_29bytes): - movl 25(%eax), %ecx - movl %ecx, 25(%edx) -L(bk_write_25bytes): - movl 21(%eax), %ecx - movl %ecx, 21(%edx) -L(bk_write_21bytes): - movl 17(%eax), %ecx - movl %ecx, 17(%edx) -L(bk_write_17bytes): - movl 13(%eax), %ecx - movl %ecx, 13(%edx) -L(bk_write_13bytes): - movl 9(%eax), %ecx - movl %ecx, 9(%edx) -L(bk_write_9bytes): - movl 5(%eax), %ecx - movl %ecx, 5(%edx) -L(bk_write_5bytes): - movl 1(%eax), %ecx - movl %ecx, 1(%edx) -L(bk_write_1bytes): - movzbl (%eax), %ecx - movb %cl, (%edx) -#ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -#endif - RETURN - - ALIGN (4) -L(bk_write_46bytes): - movl 42(%eax), %ecx - movl %ecx, 42(%edx) -L(bk_write_42bytes): - movl 38(%eax), %ecx - movl %ecx, 38(%edx) -L(bk_write_38bytes): - movl 34(%eax), %ecx - movl %ecx, 34(%edx) -L(bk_write_34bytes): - movl 30(%eax), %ecx - movl %ecx, 30(%edx) -L(bk_write_30bytes): - movl 26(%eax), %ecx - movl %ecx, 26(%edx) -L(bk_write_26bytes): - movl 22(%eax), %ecx - movl %ecx, 22(%edx) -L(bk_write_22bytes): - movl 18(%eax), %ecx - movl %ecx, 18(%edx) -L(bk_write_18bytes): - movl 14(%eax), %ecx - movl %ecx, 14(%edx) -L(bk_write_14bytes): - movl 10(%eax), %ecx - movl %ecx, 10(%edx) -L(bk_write_10bytes): - movl 6(%eax), %ecx - movl %ecx, 6(%edx) -L(bk_write_6bytes): - movl 2(%eax), %ecx - movl %ecx, 2(%edx) -L(bk_write_2bytes): - movzwl (%eax), %ecx - movw %cx, (%edx) -#ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -#endif - RETURN - - ALIGN (4) -L(bk_write_47bytes): - movl 43(%eax), %ecx - movl %ecx, 43(%edx) -L(bk_write_43bytes): - movl 39(%eax), %ecx - movl %ecx, 39(%edx) -L(bk_write_39bytes): - movl 35(%eax), %ecx - movl %ecx, 35(%edx) -L(bk_write_35bytes): - movl 31(%eax), %ecx - movl %ecx, 31(%edx) -L(bk_write_31bytes): - movl 27(%eax), %ecx - movl %ecx, 27(%edx) -L(bk_write_27bytes): - movl 23(%eax), %ecx - movl %ecx, 23(%edx) -L(bk_write_23bytes): - movl 19(%eax), %ecx - movl %ecx, 19(%edx) -L(bk_write_19bytes): - movl 15(%eax), %ecx - movl %ecx, 15(%edx) -L(bk_write_15bytes): - movl 11(%eax), %ecx - movl %ecx, 11(%edx) -L(bk_write_11bytes): - movl 7(%eax), %ecx - movl %ecx, 7(%edx) -L(bk_write_7bytes): - movl 3(%eax), %ecx - movl %ecx, 3(%edx) -L(bk_write_3bytes): - movzwl 1(%eax), %ecx - movw %cx, 1(%edx) - movzbl (%eax), %eax - movb %al, (%edx) -#ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -#endif - RETURN_END - - - .pushsection .rodata.ssse3,"a",@progbits - ALIGN (2) -L(table_48bytes_fwd): - .int JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd)) - - ALIGN (2) -L(shl_table): - .int JMPTBL (L(shl_0), L(shl_table)) - .int JMPTBL (L(shl_1), L(shl_table)) - .int JMPTBL (L(shl_2), L(shl_table)) - .int JMPTBL (L(shl_3), L(shl_table)) - .int JMPTBL (L(shl_4), L(shl_table)) - .int JMPTBL (L(shl_5), L(shl_table)) - .int JMPTBL (L(shl_6), L(shl_table)) - .int JMPTBL (L(shl_7), L(shl_table)) - .int JMPTBL (L(shl_8), L(shl_table)) - .int JMPTBL (L(shl_9), L(shl_table)) - .int JMPTBL (L(shl_10), L(shl_table)) - .int JMPTBL (L(shl_11), L(shl_table)) - .int JMPTBL (L(shl_12), L(shl_table)) - .int JMPTBL (L(shl_13), L(shl_table)) - .int JMPTBL (L(shl_14), L(shl_table)) - .int JMPTBL (L(shl_15), L(shl_table)) - - ALIGN (2) -L(table_48_bytes_bwd): - .int JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd)) - - .popsection - -#ifdef USE_AS_MEMMOVE - ALIGN (4) -L(copy_backward): - PUSH (%esi) - movl %eax, %esi - add %ecx, %edx - add %ecx, %esi - testl $0x3, %edx - jnz L(bk_align) - -L(bk_aligned_4): - cmp $64, %ecx - jae L(bk_write_more64bytes) - -L(bk_write_64bytesless): - cmp $32, %ecx - jb L(bk_write_less32bytes) - -L(bk_write_more32bytes): - /* Copy 32 bytes at a time. */ - sub $32, %ecx - movl -4(%esi), %eax - movl %eax, -4(%edx) - movl -8(%esi), %eax - movl %eax, -8(%edx) - movl -12(%esi), %eax - movl %eax, -12(%edx) - movl -16(%esi), %eax - movl %eax, -16(%edx) - movl -20(%esi), %eax - movl %eax, -20(%edx) - movl -24(%esi), %eax - movl %eax, -24(%edx) - movl -28(%esi), %eax - movl %eax, -28(%edx) - movl -32(%esi), %eax - movl %eax, -32(%edx) - sub $32, %edx - sub $32, %esi - -L(bk_write_less32bytes): - movl %esi, %eax - sub %ecx, %edx - sub %ecx, %eax - POP (%esi) -L(bk_write_less48bytes): - BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) - - CFI_PUSH (%esi) - ALIGN (4) -L(bk_align): - cmp $8, %ecx - jbe L(bk_write_less32bytes) - testl $1, %edx - /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0, - then (EDX & 2) must be != 0. */ - jz L(bk_got2) - sub $1, %esi - sub $1, %ecx - sub $1, %edx - movzbl (%esi), %eax - movb %al, (%edx) - - testl $2, %edx - jz L(bk_aligned_4) - -L(bk_got2): - sub $2, %esi - sub $2, %ecx - sub $2, %edx - movzwl (%esi), %eax - movw %ax, (%edx) - jmp L(bk_aligned_4) - - ALIGN (4) -L(bk_write_more64bytes): - /* Check alignment of last byte. */ - testl $15, %edx - jz L(bk_ssse3_cpy_pre) - -/* EDX is aligned 4 bytes, but not 16 bytes. */ -L(bk_ssse3_align): - sub $4, %esi - sub $4, %ecx - sub $4, %edx - movl (%esi), %eax - movl %eax, (%edx) - - testl $15, %edx - jz L(bk_ssse3_cpy_pre) - - sub $4, %esi - sub $4, %ecx - sub $4, %edx - movl (%esi), %eax - movl %eax, (%edx) - - testl $15, %edx - jz L(bk_ssse3_cpy_pre) - - sub $4, %esi - sub $4, %ecx - sub $4, %edx - movl (%esi), %eax - movl %eax, (%edx) - -L(bk_ssse3_cpy_pre): - cmp $64, %ecx - jb L(bk_write_more32bytes) - -L(bk_ssse3_cpy): - sub $64, %esi - sub $64, %ecx - sub $64, %edx - movdqu 0x30(%esi), %xmm3 - movdqa %xmm3, 0x30(%edx) - movdqu 0x20(%esi), %xmm2 - movdqa %xmm2, 0x20(%edx) - movdqu 0x10(%esi), %xmm1 - movdqa %xmm1, 0x10(%edx) - movdqu (%esi), %xmm0 - movdqa %xmm0, (%edx) - cmp $64, %ecx - jae L(bk_ssse3_cpy) - jmp L(bk_write_64bytesless) - -#endif - -END (MEMCPY) - -#endif diff --git a/sysdeps/i386/i686/multiarch/memcpy-ssse3.S b/sysdeps/i386/i686/multiarch/memcpy-ssse3.S deleted file mode 100644 index 53e8a6ca1d..0000000000 --- a/sysdeps/i386/i686/multiarch/memcpy-ssse3.S +++ /dev/null @@ -1,3162 +0,0 @@ -/* memcpy with SSSE3 - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) \ - && (defined SHARED \ - || defined USE_AS_MEMMOVE \ - || !defined USE_MULTIARCH) - -# include <sysdep.h> -# include "asm-syntax.h" - -# ifndef MEMCPY -# define MEMCPY __memcpy_ssse3 -# define MEMCPY_CHK __memcpy_chk_ssse3 -# endif - -# ifdef USE_AS_BCOPY -# define SRC PARMS -# define DEST SRC+4 -# define LEN DEST+4 -# else -# define DEST PARMS -# define SRC DEST+4 -# define LEN SRC+4 -# endif - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# ifdef SHARED -# define PARMS 8 /* Preserve EBX. */ -# define ENTRANCE PUSH (%ebx); -# define RETURN_END POP (%ebx); ret -# define RETURN RETURN_END; CFI_PUSH (%ebx) -# define JMPTBL(I, B) I - B - -/* Load an entry in a jump table into EBX and branch to it. TABLE is a - jump table with relative offsets. INDEX is a register contains the - index into the jump table. SCALE is the scale of INDEX. */ - -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - /* We first load PC into EBX. */ \ - SETUP_PIC_REG(bx); \ - /* Get the address of the jump table. */ \ - addl $(TABLE - .), %ebx; \ - /* Get the entry and convert the relative offset to the \ - absolute address. */ \ - addl (%ebx, INDEX, SCALE), %ebx; \ - /* We loaded the jump table. Go. */ \ - jmp *%ebx -# else - -# define PARMS 4 -# define ENTRANCE -# define RETURN_END ret -# define RETURN RETURN_END -# define JMPTBL(I, B) I - -/* Branch to an entry in a jump table. TABLE is a jump table with - absolute offsets. INDEX is a register contains the index into the - jump table. SCALE is the scale of INDEX. */ - -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - jmp *TABLE(, INDEX, SCALE) -# endif - - .section .text.ssse3,"ax",@progbits -# if !defined USE_AS_BCOPY -ENTRY (MEMCPY_CHK) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END (MEMCPY_CHK) -# endif -ENTRY (MEMCPY) - ENTRANCE - movl LEN(%esp), %ecx - movl SRC(%esp), %eax - movl DEST(%esp), %edx - -# ifdef USE_AS_MEMMOVE - cmp %eax, %edx - jb L(copy_forward) - je L(fwd_write_0bytes) - cmp $32, %ecx - jae L(memmove_bwd) - jmp L(bk_write_less32bytes_2) - - .p2align 4 -L(memmove_bwd): - add %ecx, %eax - cmp %eax, %edx - movl SRC(%esp), %eax - jb L(copy_backward) - -L(copy_forward): -# endif - cmp $48, %ecx - jae L(48bytesormore) - -L(fwd_write_less32bytes): -# ifndef USE_AS_MEMMOVE - cmp %dl, %al - jb L(bk_write) -# endif - add %ecx, %edx - add %ecx, %eax - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) -# ifndef USE_AS_MEMMOVE - .p2align 4 -L(bk_write): - BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) -# endif - - .p2align 4 -L(48bytesormore): -# ifndef USE_AS_MEMMOVE - movlpd (%eax), %xmm0 - movlpd 8(%eax), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 8(%edx) -# else - movdqu (%eax), %xmm0 -# endif - PUSH (%edi) - movl %edx, %edi - and $-16, %edx - add $16, %edx - sub %edx, %edi - add %edi, %ecx - sub %edi, %eax - -# ifdef SHARED_CACHE_SIZE_HALF - cmp $SHARED_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_shared_cache_size_half, %ecx -# endif -# endif - - mov %eax, %edi - jae L(large_page) - and $0xf, %edi - jz L(shl_0) - BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4) - - .p2align 4 -L(shl_0): -# ifdef USE_AS_MEMMOVE - movl DEST+4(%esp), %edi - movdqu %xmm0, (%edi) -# endif - xor %edi, %edi - cmp $127, %ecx - ja L(shl_0_gobble) - lea -32(%ecx), %ecx - - .p2align 4 -L(shl_0_loop): - movdqa (%eax, %edi), %xmm0 - movdqa 16(%eax, %edi), %xmm1 - sub $32, %ecx - movdqa %xmm0, (%edx, %edi) - movdqa %xmm1, 16(%edx, %edi) - lea 32(%edi), %edi - jb L(shl_0_end) - - movdqa (%eax, %edi), %xmm0 - movdqa 16(%eax, %edi), %xmm1 - sub $32, %ecx - movdqa %xmm0, (%edx, %edi) - movdqa %xmm1, 16(%edx, %edi) - lea 32(%edi), %edi - jb L(shl_0_end) - - movdqa (%eax, %edi), %xmm0 - movdqa 16(%eax, %edi), %xmm1 - sub $32, %ecx - movdqa %xmm0, (%edx, %edi) - movdqa %xmm1, 16(%edx, %edi) - lea 32(%edi), %edi - jb L(shl_0_end) - - movdqa (%eax, %edi), %xmm0 - movdqa 16(%eax, %edi), %xmm1 - sub $32, %ecx - movdqa %xmm0, (%edx, %edi) - movdqa %xmm1, 16(%edx, %edi) - lea 32(%edi), %edi - -L(shl_0_end): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - add %edi, %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_0_gobble): -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - POP (%edi) - lea -128(%ecx), %ecx - jae L(shl_0_gobble_mem_loop) - - .p2align 4 -L(shl_0_gobble_cache_loop): - movdqa (%eax), %xmm0 - movdqa 0x10(%eax), %xmm1 - movdqa 0x20(%eax), %xmm2 - movdqa 0x30(%eax), %xmm3 - movdqa 0x40(%eax), %xmm4 - movdqa 0x50(%eax), %xmm5 - movdqa 0x60(%eax), %xmm6 - movdqa 0x70(%eax), %xmm7 - lea 0x80(%eax), %eax - sub $128, %ecx - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - movdqa %xmm2, 0x20(%edx) - movdqa %xmm3, 0x30(%edx) - movdqa %xmm4, 0x40(%edx) - movdqa %xmm5, 0x50(%edx) - movdqa %xmm6, 0x60(%edx) - movdqa %xmm7, 0x70(%edx) - lea 0x80(%edx), %edx - - jae L(shl_0_gobble_cache_loop) - cmp $-0x40, %ecx - lea 0x80(%ecx), %ecx - jl L(shl_0_cache_less_64bytes) - - movdqa (%eax), %xmm0 - sub $0x40, %ecx - movdqa 0x10(%eax), %xmm1 - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - movdqa 0x20(%eax), %xmm0 - movdqa 0x30(%eax), %xmm1 - add $0x40, %eax - movdqa %xmm0, 0x20(%edx) - movdqa %xmm1, 0x30(%edx) - add $0x40, %edx - -L(shl_0_cache_less_64bytes): - cmp $0x20, %ecx - jb L(shl_0_cache_less_32bytes) - movdqa (%eax), %xmm0 - sub $0x20, %ecx - movdqa 0x10(%eax), %xmm1 - add $0x20, %eax - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - add $0x20, %edx - -L(shl_0_cache_less_32bytes): - cmp $0x10, %ecx - jb L(shl_0_cache_less_16bytes) - sub $0x10, %ecx - movdqa (%eax), %xmm0 - add $0x10, %eax - movdqa %xmm0, (%edx) - add $0x10, %edx - -L(shl_0_cache_less_16bytes): - add %ecx, %edx - add %ecx, %eax - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - - .p2align 4 -L(shl_0_gobble_mem_loop): - prefetcht0 0x1c0(%eax) - prefetcht0 0x280(%eax) - prefetcht0 0x1c0(%edx) - - movdqa (%eax), %xmm0 - movdqa 0x10(%eax), %xmm1 - movdqa 0x20(%eax), %xmm2 - movdqa 0x30(%eax), %xmm3 - movdqa 0x40(%eax), %xmm4 - movdqa 0x50(%eax), %xmm5 - movdqa 0x60(%eax), %xmm6 - movdqa 0x70(%eax), %xmm7 - lea 0x80(%eax), %eax - sub $0x80, %ecx - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - movdqa %xmm2, 0x20(%edx) - movdqa %xmm3, 0x30(%edx) - movdqa %xmm4, 0x40(%edx) - movdqa %xmm5, 0x50(%edx) - movdqa %xmm6, 0x60(%edx) - movdqa %xmm7, 0x70(%edx) - lea 0x80(%edx), %edx - - jae L(shl_0_gobble_mem_loop) - cmp $-0x40, %ecx - lea 0x80(%ecx), %ecx - jl L(shl_0_mem_less_64bytes) - - movdqa (%eax), %xmm0 - sub $0x40, %ecx - movdqa 0x10(%eax), %xmm1 - - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - - movdqa 0x20(%eax), %xmm0 - movdqa 0x30(%eax), %xmm1 - add $0x40, %eax - - movdqa %xmm0, 0x20(%edx) - movdqa %xmm1, 0x30(%edx) - add $0x40, %edx - -L(shl_0_mem_less_64bytes): - cmp $0x20, %ecx - jb L(shl_0_mem_less_32bytes) - movdqa (%eax), %xmm0 - sub $0x20, %ecx - movdqa 0x10(%eax), %xmm1 - add $0x20, %eax - movdqa %xmm0, (%edx) - movdqa %xmm1, 0x10(%edx) - add $0x20, %edx - -L(shl_0_mem_less_32bytes): - cmp $0x10, %ecx - jb L(shl_0_mem_less_16bytes) - sub $0x10, %ecx - movdqa (%eax), %xmm0 - add $0x10, %eax - movdqa %xmm0, (%edx) - add $0x10, %edx - -L(shl_0_mem_less_16bytes): - add %ecx, %edx - add %ecx, %eax - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4) - - .p2align 4 -L(shl_1): -# ifndef USE_AS_MEMMOVE - movaps -1(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -1(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_1_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl1LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 15(%eax), %xmm2 - movaps 31(%eax), %xmm3 - movaps 47(%eax), %xmm4 - movaps 63(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $1, %xmm4, %xmm5 - palignr $1, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $1, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $1, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl1LoopStart) - -L(Shl1LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 15(%eax), %xmm2 - movaps 31(%eax), %xmm3 - palignr $1, %xmm2, %xmm3 - palignr $1, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_1_no_prefetch): - lea -32(%ecx), %ecx - lea -1(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_1_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $1, %xmm2, %xmm3 - palignr $1, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_1_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $1, %xmm2, %xmm3 - palignr $1, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_1_no_prefetch_loop) - -L(sh_1_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 1(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_2): -# ifndef USE_AS_MEMMOVE - movaps -2(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -2(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_2_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl2LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 14(%eax), %xmm2 - movaps 30(%eax), %xmm3 - movaps 46(%eax), %xmm4 - movaps 62(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $2, %xmm4, %xmm5 - palignr $2, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $2, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $2, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl2LoopStart) - -L(Shl2LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 14(%eax), %xmm2 - movaps 30(%eax), %xmm3 - palignr $2, %xmm2, %xmm3 - palignr $2, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_2_no_prefetch): - lea -32(%ecx), %ecx - lea -2(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_2_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $2, %xmm2, %xmm3 - palignr $2, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_2_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $2, %xmm2, %xmm3 - palignr $2, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_2_no_prefetch_loop) - -L(sh_2_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 2(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_3): -# ifndef USE_AS_MEMMOVE - movaps -3(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -3(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_3_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl3LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 13(%eax), %xmm2 - movaps 29(%eax), %xmm3 - movaps 45(%eax), %xmm4 - movaps 61(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $3, %xmm4, %xmm5 - palignr $3, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $3, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $3, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl3LoopStart) - -L(Shl3LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 13(%eax), %xmm2 - movaps 29(%eax), %xmm3 - palignr $3, %xmm2, %xmm3 - palignr $3, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_3_no_prefetch): - lea -32(%ecx), %ecx - lea -3(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_3_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $3, %xmm2, %xmm3 - palignr $3, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(sh_3_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $3, %xmm2, %xmm3 - palignr $3, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(sh_3_no_prefetch_loop) - -L(sh_3_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 3(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_4): -# ifndef USE_AS_MEMMOVE - movaps -4(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -4(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_4_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl4LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 12(%eax), %xmm2 - movaps 28(%eax), %xmm3 - movaps 44(%eax), %xmm4 - movaps 60(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $4, %xmm4, %xmm5 - palignr $4, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $4, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $4, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl4LoopStart) - -L(Shl4LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 12(%eax), %xmm2 - movaps 28(%eax), %xmm3 - palignr $4, %xmm2, %xmm3 - palignr $4, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_4_no_prefetch): - lea -32(%ecx), %ecx - lea -4(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_4_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $4, %xmm2, %xmm3 - palignr $4, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(sh_4_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $4, %xmm2, %xmm3 - palignr $4, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(sh_4_no_prefetch_loop) - -L(sh_4_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 4(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_5): -# ifndef USE_AS_MEMMOVE - movaps -5(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -5(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_5_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl5LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 11(%eax), %xmm2 - movaps 27(%eax), %xmm3 - movaps 43(%eax), %xmm4 - movaps 59(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $5, %xmm4, %xmm5 - palignr $5, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $5, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $5, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl5LoopStart) - -L(Shl5LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 11(%eax), %xmm2 - movaps 27(%eax), %xmm3 - palignr $5, %xmm2, %xmm3 - palignr $5, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_5_no_prefetch): - lea -32(%ecx), %ecx - lea -5(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_5_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $5, %xmm2, %xmm3 - palignr $5, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(sh_5_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $5, %xmm2, %xmm3 - palignr $5, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(sh_5_no_prefetch_loop) - -L(sh_5_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 5(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_6): -# ifndef USE_AS_MEMMOVE - movaps -6(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -6(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_6_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl6LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 10(%eax), %xmm2 - movaps 26(%eax), %xmm3 - movaps 42(%eax), %xmm4 - movaps 58(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $6, %xmm4, %xmm5 - palignr $6, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $6, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $6, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl6LoopStart) - -L(Shl6LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 10(%eax), %xmm2 - movaps 26(%eax), %xmm3 - palignr $6, %xmm2, %xmm3 - palignr $6, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_6_no_prefetch): - lea -32(%ecx), %ecx - lea -6(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_6_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $6, %xmm2, %xmm3 - palignr $6, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jb L(sh_6_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $6, %xmm2, %xmm3 - palignr $6, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - - jae L(sh_6_no_prefetch_loop) - -L(sh_6_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 6(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_7): -# ifndef USE_AS_MEMMOVE - movaps -7(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -7(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_7_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl7LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 9(%eax), %xmm2 - movaps 25(%eax), %xmm3 - movaps 41(%eax), %xmm4 - movaps 57(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $7, %xmm4, %xmm5 - palignr $7, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $7, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $7, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl7LoopStart) - -L(Shl7LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 9(%eax), %xmm2 - movaps 25(%eax), %xmm3 - palignr $7, %xmm2, %xmm3 - palignr $7, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_7_no_prefetch): - lea -32(%ecx), %ecx - lea -7(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_7_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $7, %xmm2, %xmm3 - palignr $7, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_7_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $7, %xmm2, %xmm3 - palignr $7, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_7_no_prefetch_loop) - -L(sh_7_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 7(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_8): -# ifndef USE_AS_MEMMOVE - movaps -8(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -8(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_8_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl8LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 8(%eax), %xmm2 - movaps 24(%eax), %xmm3 - movaps 40(%eax), %xmm4 - movaps 56(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $8, %xmm4, %xmm5 - palignr $8, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $8, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $8, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl8LoopStart) - -L(LoopLeave8): - add $32, %ecx - jle L(shl_end_0) - - movaps 8(%eax), %xmm2 - movaps 24(%eax), %xmm3 - palignr $8, %xmm2, %xmm3 - palignr $8, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_8_no_prefetch): - lea -32(%ecx), %ecx - lea -8(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_8_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $8, %xmm2, %xmm3 - palignr $8, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_8_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $8, %xmm2, %xmm3 - palignr $8, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_8_no_prefetch_loop) - -L(sh_8_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 8(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_9): -# ifndef USE_AS_MEMMOVE - movaps -9(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -9(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_9_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl9LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 7(%eax), %xmm2 - movaps 23(%eax), %xmm3 - movaps 39(%eax), %xmm4 - movaps 55(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $9, %xmm4, %xmm5 - palignr $9, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $9, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $9, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl9LoopStart) - -L(Shl9LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 7(%eax), %xmm2 - movaps 23(%eax), %xmm3 - palignr $9, %xmm2, %xmm3 - palignr $9, %xmm1, %xmm2 - - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_9_no_prefetch): - lea -32(%ecx), %ecx - lea -9(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_9_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $9, %xmm2, %xmm3 - palignr $9, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_9_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $9, %xmm2, %xmm3 - palignr $9, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_9_no_prefetch_loop) - -L(sh_9_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 9(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_10): -# ifndef USE_AS_MEMMOVE - movaps -10(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -10(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_10_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl10LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 6(%eax), %xmm2 - movaps 22(%eax), %xmm3 - movaps 38(%eax), %xmm4 - movaps 54(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $10, %xmm4, %xmm5 - palignr $10, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $10, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $10, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl10LoopStart) - -L(Shl10LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 6(%eax), %xmm2 - movaps 22(%eax), %xmm3 - palignr $10, %xmm2, %xmm3 - palignr $10, %xmm1, %xmm2 - - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_10_no_prefetch): - lea -32(%ecx), %ecx - lea -10(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_10_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $10, %xmm2, %xmm3 - palignr $10, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_10_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $10, %xmm2, %xmm3 - palignr $10, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_10_no_prefetch_loop) - -L(sh_10_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 10(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_11): -# ifndef USE_AS_MEMMOVE - movaps -11(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -11(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_11_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl11LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 5(%eax), %xmm2 - movaps 21(%eax), %xmm3 - movaps 37(%eax), %xmm4 - movaps 53(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $11, %xmm4, %xmm5 - palignr $11, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $11, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $11, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl11LoopStart) - -L(Shl11LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 5(%eax), %xmm2 - movaps 21(%eax), %xmm3 - palignr $11, %xmm2, %xmm3 - palignr $11, %xmm1, %xmm2 - - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_11_no_prefetch): - lea -32(%ecx), %ecx - lea -11(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_11_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $11, %xmm2, %xmm3 - palignr $11, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_11_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $11, %xmm2, %xmm3 - palignr $11, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_11_no_prefetch_loop) - -L(sh_11_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 11(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_12): -# ifndef USE_AS_MEMMOVE - movaps -12(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -12(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_12_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl12LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 4(%eax), %xmm2 - movaps 20(%eax), %xmm3 - movaps 36(%eax), %xmm4 - movaps 52(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $12, %xmm4, %xmm5 - palignr $12, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $12, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $12, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl12LoopStart) - -L(Shl12LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 4(%eax), %xmm2 - movaps 20(%eax), %xmm3 - palignr $12, %xmm2, %xmm3 - palignr $12, %xmm1, %xmm2 - - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_12_no_prefetch): - lea -32(%ecx), %ecx - lea -12(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_12_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $12, %xmm2, %xmm3 - palignr $12, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_12_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $12, %xmm2, %xmm3 - palignr $12, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_12_no_prefetch_loop) - -L(sh_12_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 12(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_13): -# ifndef USE_AS_MEMMOVE - movaps -13(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -13(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_13_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl13LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 3(%eax), %xmm2 - movaps 19(%eax), %xmm3 - movaps 35(%eax), %xmm4 - movaps 51(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $13, %xmm4, %xmm5 - palignr $13, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $13, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $13, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl13LoopStart) - -L(Shl13LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 3(%eax), %xmm2 - movaps 19(%eax), %xmm3 - palignr $13, %xmm2, %xmm3 - palignr $13, %xmm1, %xmm2 - - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_13_no_prefetch): - lea -32(%ecx), %ecx - lea -13(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_13_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $13, %xmm2, %xmm3 - palignr $13, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_13_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $13, %xmm2, %xmm3 - palignr $13, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_13_no_prefetch_loop) - -L(sh_13_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 13(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_14): -# ifndef USE_AS_MEMMOVE - movaps -14(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -14(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_14_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl14LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 2(%eax), %xmm2 - movaps 18(%eax), %xmm3 - movaps 34(%eax), %xmm4 - movaps 50(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $14, %xmm4, %xmm5 - palignr $14, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $14, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $14, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl14LoopStart) - -L(Shl14LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 2(%eax), %xmm2 - movaps 18(%eax), %xmm3 - palignr $14, %xmm2, %xmm3 - palignr $14, %xmm1, %xmm2 - - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_14_no_prefetch): - lea -32(%ecx), %ecx - lea -14(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_14_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $14, %xmm2, %xmm3 - palignr $14, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_14_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $14, %xmm2, %xmm3 - palignr $14, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_14_no_prefetch_loop) - -L(sh_14_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 14(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_15): -# ifndef USE_AS_MEMMOVE - movaps -15(%eax), %xmm1 -# else - movl DEST+4(%esp), %edi - movaps -15(%eax), %xmm1 - movdqu %xmm0, (%edi) -# endif -# ifdef DATA_CACHE_SIZE_HALF - cmp $DATA_CACHE_SIZE_HALF, %ecx -# else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx -# else - cmp __x86_data_cache_size_half, %ecx -# endif -# endif - jb L(sh_15_no_prefetch) - - lea -64(%ecx), %ecx - - .p2align 4 -L(Shl15LoopStart): - prefetcht0 0x1c0(%eax) - prefetcht0 0x1c0(%edx) - movaps 1(%eax), %xmm2 - movaps 17(%eax), %xmm3 - movaps 33(%eax), %xmm4 - movaps 49(%eax), %xmm5 - movaps %xmm5, %xmm7 - palignr $15, %xmm4, %xmm5 - palignr $15, %xmm3, %xmm4 - movaps %xmm5, 48(%edx) - palignr $15, %xmm2, %xmm3 - lea 64(%eax), %eax - palignr $15, %xmm1, %xmm2 - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm7, %xmm1 - movaps %xmm2, (%edx) - lea 64(%edx), %edx - sub $64, %ecx - ja L(Shl15LoopStart) - -L(Shl15LoopLeave): - add $32, %ecx - jle L(shl_end_0) - - movaps 1(%eax), %xmm2 - movaps 17(%eax), %xmm3 - palignr $15, %xmm2, %xmm3 - palignr $15, %xmm1, %xmm2 - - movaps %xmm2, (%edx) - movaps %xmm3, 16(%edx) - lea 32(%edx, %ecx), %edx - lea 32(%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(sh_15_no_prefetch): - lea -32(%ecx), %ecx - lea -15(%eax), %eax - xor %edi, %edi - - .p2align 4 -L(sh_15_no_prefetch_loop): - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm4 - palignr $15, %xmm2, %xmm3 - palignr $15, %xmm1, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jb L(sh_15_end_no_prefetch_loop) - - movdqa 16(%eax, %edi), %xmm2 - sub $32, %ecx - movdqa 32(%eax, %edi), %xmm3 - movdqa %xmm3, %xmm1 - palignr $15, %xmm2, %xmm3 - palignr $15, %xmm4, %xmm2 - lea 32(%edi), %edi - movdqa %xmm2, -32(%edx, %edi) - movdqa %xmm3, -16(%edx, %edi) - jae L(sh_15_no_prefetch_loop) - -L(sh_15_end_no_prefetch_loop): - lea 32(%ecx), %ecx - add %ecx, %edi - add %edi, %edx - lea 15(%edi, %eax), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(shl_end_0): - lea 32(%ecx), %ecx - lea (%edx, %ecx), %edx - lea (%eax, %ecx), %eax - POP (%edi) - BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4) - - .p2align 4 -L(fwd_write_44bytes): - movq -44(%eax), %xmm0 - movq %xmm0, -44(%edx) -L(fwd_write_36bytes): - movq -36(%eax), %xmm0 - movq %xmm0, -36(%edx) -L(fwd_write_28bytes): - movq -28(%eax), %xmm0 - movq %xmm0, -28(%edx) -L(fwd_write_20bytes): - movq -20(%eax), %xmm0 - movq %xmm0, -20(%edx) -L(fwd_write_12bytes): - movq -12(%eax), %xmm0 - movq %xmm0, -12(%edx) -L(fwd_write_4bytes): - movl -4(%eax), %ecx - movl %ecx, -4(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_40bytes): - movq -40(%eax), %xmm0 - movq %xmm0, -40(%edx) -L(fwd_write_32bytes): - movq -32(%eax), %xmm0 - movq %xmm0, -32(%edx) -L(fwd_write_24bytes): - movq -24(%eax), %xmm0 - movq %xmm0, -24(%edx) -L(fwd_write_16bytes): - movq -16(%eax), %xmm0 - movq %xmm0, -16(%edx) -L(fwd_write_8bytes): - movq -8(%eax), %xmm0 - movq %xmm0, -8(%edx) -L(fwd_write_0bytes): -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_5bytes): - movl -5(%eax), %ecx - movl -4(%eax), %eax - movl %ecx, -5(%edx) - movl %eax, -4(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_45bytes): - movq -45(%eax), %xmm0 - movq %xmm0, -45(%edx) -L(fwd_write_37bytes): - movq -37(%eax), %xmm0 - movq %xmm0, -37(%edx) -L(fwd_write_29bytes): - movq -29(%eax), %xmm0 - movq %xmm0, -29(%edx) -L(fwd_write_21bytes): - movq -21(%eax), %xmm0 - movq %xmm0, -21(%edx) -L(fwd_write_13bytes): - movq -13(%eax), %xmm0 - movq %xmm0, -13(%edx) - movl -5(%eax), %ecx - movl %ecx, -5(%edx) - movzbl -1(%eax), %ecx - movb %cl, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_41bytes): - movq -41(%eax), %xmm0 - movq %xmm0, -41(%edx) -L(fwd_write_33bytes): - movq -33(%eax), %xmm0 - movq %xmm0, -33(%edx) -L(fwd_write_25bytes): - movq -25(%eax), %xmm0 - movq %xmm0, -25(%edx) -L(fwd_write_17bytes): - movq -17(%eax), %xmm0 - movq %xmm0, -17(%edx) -L(fwd_write_9bytes): - movq -9(%eax), %xmm0 - movq %xmm0, -9(%edx) -L(fwd_write_1bytes): - movzbl -1(%eax), %ecx - movb %cl, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_46bytes): - movq -46(%eax), %xmm0 - movq %xmm0, -46(%edx) -L(fwd_write_38bytes): - movq -38(%eax), %xmm0 - movq %xmm0, -38(%edx) -L(fwd_write_30bytes): - movq -30(%eax), %xmm0 - movq %xmm0, -30(%edx) -L(fwd_write_22bytes): - movq -22(%eax), %xmm0 - movq %xmm0, -22(%edx) -L(fwd_write_14bytes): - movq -14(%eax), %xmm0 - movq %xmm0, -14(%edx) -L(fwd_write_6bytes): - movl -6(%eax), %ecx - movl %ecx, -6(%edx) - movzwl -2(%eax), %ecx - movw %cx, -2(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_42bytes): - movq -42(%eax), %xmm0 - movq %xmm0, -42(%edx) -L(fwd_write_34bytes): - movq -34(%eax), %xmm0 - movq %xmm0, -34(%edx) -L(fwd_write_26bytes): - movq -26(%eax), %xmm0 - movq %xmm0, -26(%edx) -L(fwd_write_18bytes): - movq -18(%eax), %xmm0 - movq %xmm0, -18(%edx) -L(fwd_write_10bytes): - movq -10(%eax), %xmm0 - movq %xmm0, -10(%edx) -L(fwd_write_2bytes): - movzwl -2(%eax), %ecx - movw %cx, -2(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_47bytes): - movq -47(%eax), %xmm0 - movq %xmm0, -47(%edx) -L(fwd_write_39bytes): - movq -39(%eax), %xmm0 - movq %xmm0, -39(%edx) -L(fwd_write_31bytes): - movq -31(%eax), %xmm0 - movq %xmm0, -31(%edx) -L(fwd_write_23bytes): - movq -23(%eax), %xmm0 - movq %xmm0, -23(%edx) -L(fwd_write_15bytes): - movq -15(%eax), %xmm0 - movq %xmm0, -15(%edx) -L(fwd_write_7bytes): - movl -7(%eax), %ecx - movl %ecx, -7(%edx) - movzwl -3(%eax), %ecx - movzbl -1(%eax), %eax - movw %cx, -3(%edx) - movb %al, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_43bytes): - movq -43(%eax), %xmm0 - movq %xmm0, -43(%edx) -L(fwd_write_35bytes): - movq -35(%eax), %xmm0 - movq %xmm0, -35(%edx) -L(fwd_write_27bytes): - movq -27(%eax), %xmm0 - movq %xmm0, -27(%edx) -L(fwd_write_19bytes): - movq -19(%eax), %xmm0 - movq %xmm0, -19(%edx) -L(fwd_write_11bytes): - movq -11(%eax), %xmm0 - movq %xmm0, -11(%edx) -L(fwd_write_3bytes): - movzwl -3(%eax), %ecx - movzbl -1(%eax), %eax - movw %cx, -3(%edx) - movb %al, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_40bytes_align): - movdqa -40(%eax), %xmm0 - movdqa %xmm0, -40(%edx) -L(fwd_write_24bytes_align): - movdqa -24(%eax), %xmm0 - movdqa %xmm0, -24(%edx) -L(fwd_write_8bytes_align): - movq -8(%eax), %xmm0 - movq %xmm0, -8(%edx) -L(fwd_write_0bytes_align): -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_32bytes_align): - movdqa -32(%eax), %xmm0 - movdqa %xmm0, -32(%edx) -L(fwd_write_16bytes_align): - movdqa -16(%eax), %xmm0 - movdqa %xmm0, -16(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_5bytes_align): - movl -5(%eax), %ecx - movl -4(%eax), %eax - movl %ecx, -5(%edx) - movl %eax, -4(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_45bytes_align): - movdqa -45(%eax), %xmm0 - movdqa %xmm0, -45(%edx) -L(fwd_write_29bytes_align): - movdqa -29(%eax), %xmm0 - movdqa %xmm0, -29(%edx) -L(fwd_write_13bytes_align): - movq -13(%eax), %xmm0 - movq %xmm0, -13(%edx) - movl -5(%eax), %ecx - movl %ecx, -5(%edx) - movzbl -1(%eax), %ecx - movb %cl, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_37bytes_align): - movdqa -37(%eax), %xmm0 - movdqa %xmm0, -37(%edx) -L(fwd_write_21bytes_align): - movdqa -21(%eax), %xmm0 - movdqa %xmm0, -21(%edx) - movl -5(%eax), %ecx - movl %ecx, -5(%edx) - movzbl -1(%eax), %ecx - movb %cl, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_41bytes_align): - movdqa -41(%eax), %xmm0 - movdqa %xmm0, -41(%edx) -L(fwd_write_25bytes_align): - movdqa -25(%eax), %xmm0 - movdqa %xmm0, -25(%edx) -L(fwd_write_9bytes_align): - movq -9(%eax), %xmm0 - movq %xmm0, -9(%edx) -L(fwd_write_1bytes_align): - movzbl -1(%eax), %ecx - movb %cl, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_33bytes_align): - movdqa -33(%eax), %xmm0 - movdqa %xmm0, -33(%edx) -L(fwd_write_17bytes_align): - movdqa -17(%eax), %xmm0 - movdqa %xmm0, -17(%edx) - movzbl -1(%eax), %ecx - movb %cl, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_46bytes_align): - movdqa -46(%eax), %xmm0 - movdqa %xmm0, -46(%edx) -L(fwd_write_30bytes_align): - movdqa -30(%eax), %xmm0 - movdqa %xmm0, -30(%edx) -L(fwd_write_14bytes_align): - movq -14(%eax), %xmm0 - movq %xmm0, -14(%edx) -L(fwd_write_6bytes_align): - movl -6(%eax), %ecx - movl %ecx, -6(%edx) - movzwl -2(%eax), %ecx - movw %cx, -2(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_38bytes_align): - movdqa -38(%eax), %xmm0 - movdqa %xmm0, -38(%edx) -L(fwd_write_22bytes_align): - movdqa -22(%eax), %xmm0 - movdqa %xmm0, -22(%edx) - movl -6(%eax), %ecx - movl %ecx, -6(%edx) - movzwl -2(%eax), %ecx - movw %cx, -2(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_42bytes_align): - movdqa -42(%eax), %xmm0 - movdqa %xmm0, -42(%edx) -L(fwd_write_26bytes_align): - movdqa -26(%eax), %xmm0 - movdqa %xmm0, -26(%edx) -L(fwd_write_10bytes_align): - movq -10(%eax), %xmm0 - movq %xmm0, -10(%edx) -L(fwd_write_2bytes_align): - movzwl -2(%eax), %ecx - movw %cx, -2(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_34bytes_align): - movdqa -34(%eax), %xmm0 - movdqa %xmm0, -34(%edx) -L(fwd_write_18bytes_align): - movdqa -18(%eax), %xmm0 - movdqa %xmm0, -18(%edx) - movzwl -2(%eax), %ecx - movw %cx, -2(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_47bytes_align): - movdqa -47(%eax), %xmm0 - movdqa %xmm0, -47(%edx) -L(fwd_write_31bytes_align): - movdqa -31(%eax), %xmm0 - movdqa %xmm0, -31(%edx) -L(fwd_write_15bytes_align): - movq -15(%eax), %xmm0 - movq %xmm0, -15(%edx) -L(fwd_write_7bytes_align): - movl -7(%eax), %ecx - movl %ecx, -7(%edx) - movzwl -3(%eax), %ecx - movzbl -1(%eax), %eax - movw %cx, -3(%edx) - movb %al, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_39bytes_align): - movdqa -39(%eax), %xmm0 - movdqa %xmm0, -39(%edx) -L(fwd_write_23bytes_align): - movdqa -23(%eax), %xmm0 - movdqa %xmm0, -23(%edx) - movl -7(%eax), %ecx - movl %ecx, -7(%edx) - movzwl -3(%eax), %ecx - movzbl -1(%eax), %eax - movw %cx, -3(%edx) - movb %al, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_43bytes_align): - movdqa -43(%eax), %xmm0 - movdqa %xmm0, -43(%edx) -L(fwd_write_27bytes_align): - movdqa -27(%eax), %xmm0 - movdqa %xmm0, -27(%edx) -L(fwd_write_11bytes_align): - movq -11(%eax), %xmm0 - movq %xmm0, -11(%edx) -L(fwd_write_3bytes_align): - movzwl -3(%eax), %ecx - movzbl -1(%eax), %eax - movw %cx, -3(%edx) - movb %al, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_35bytes_align): - movdqa -35(%eax), %xmm0 - movdqa %xmm0, -35(%edx) -L(fwd_write_19bytes_align): - movdqa -19(%eax), %xmm0 - movdqa %xmm0, -19(%edx) - movzwl -3(%eax), %ecx - movzbl -1(%eax), %eax - movw %cx, -3(%edx) - movb %al, -1(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_44bytes_align): - movdqa -44(%eax), %xmm0 - movdqa %xmm0, -44(%edx) -L(fwd_write_28bytes_align): - movdqa -28(%eax), %xmm0 - movdqa %xmm0, -28(%edx) -L(fwd_write_12bytes_align): - movq -12(%eax), %xmm0 - movq %xmm0, -12(%edx) -L(fwd_write_4bytes_align): - movl -4(%eax), %ecx - movl %ecx, -4(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN - - .p2align 4 -L(fwd_write_36bytes_align): - movdqa -36(%eax), %xmm0 - movdqa %xmm0, -36(%edx) -L(fwd_write_20bytes_align): - movdqa -20(%eax), %xmm0 - movdqa %xmm0, -20(%edx) - movl -4(%eax), %ecx - movl %ecx, -4(%edx) -# ifndef USE_AS_BCOPY -# ifdef USE_AS_MEMPCPY - movl %edx, %eax -# else - movl DEST(%esp), %eax -# endif -# endif - RETURN_END - - CFI_PUSH (%edi) - - .p2align 4 -L(large_page): - movdqu (%eax), %xmm1 -# ifdef USE_AS_MEMMOVE - movl DEST+4(%esp), %edi - movdqu %xmm0, (%edi) -# endif - lea 16(%eax), %eax - movntdq %xmm1, (%edx) - lea 16(%edx), %edx - lea -0x90(%ecx), %ecx - POP (%edi) - - .p2align 4 -L(large_page_loop): - movdqu (%eax), %xmm0 - movdqu 0x10(%eax), %xmm1 - movdqu 0x20(%eax), %xmm2 - movdqu 0x30(%eax), %xmm3 - movdqu 0x40(%eax), %xmm4 - movdqu 0x50(%eax), %xmm5 - movdqu 0x60(%eax), %xmm6 - movdqu 0x70(%eax), %xmm7 - lea 0x80(%eax), %eax - - sub $0x80, %ecx - movntdq %xmm0, (%edx) - movntdq %xmm1, 0x10(%edx) - movntdq %xmm2, 0x20(%edx) - movntdq %xmm3, 0x30(%edx) - movntdq %xmm4, 0x40(%edx) - movntdq %xmm5, 0x50(%edx) - movntdq %xmm6, 0x60(%edx) - movntdq %xmm7, 0x70(%edx) - lea 0x80(%edx), %edx - jae L(large_page_loop) - cmp $-0x40, %ecx - lea 0x80(%ecx), %ecx - jl L(large_page_less_64bytes) - - movdqu (%eax), %xmm0 - movdqu 0x10(%eax), %xmm1 - movdqu 0x20(%eax), %xmm2 - movdqu 0x30(%eax), %xmm3 - lea 0x40(%eax), %eax - - movntdq %xmm0, (%edx) - movntdq %xmm1, 0x10(%edx) - movntdq %xmm2, 0x20(%edx) - movntdq %xmm3, 0x30(%edx) - lea 0x40(%edx), %edx - sub $0x40, %ecx -L(large_page_less_64bytes): - cmp $32, %ecx - jb L(large_page_less_32bytes) - movdqu (%eax), %xmm0 - movdqu 0x10(%eax), %xmm1 - lea 0x20(%eax), %eax - movntdq %xmm0, (%edx) - movntdq %xmm1, 0x10(%edx) - lea 0x20(%edx), %edx - sub $0x20, %ecx -L(large_page_less_32bytes): - add %ecx, %edx - add %ecx, %eax - sfence - BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - - .p2align 4 -L(bk_write_44bytes): - movq 36(%eax), %xmm0 - movq %xmm0, 36(%edx) -L(bk_write_36bytes): - movq 28(%eax), %xmm0 - movq %xmm0, 28(%edx) -L(bk_write_28bytes): - movq 20(%eax), %xmm0 - movq %xmm0, 20(%edx) -L(bk_write_20bytes): - movq 12(%eax), %xmm0 - movq %xmm0, 12(%edx) -L(bk_write_12bytes): - movq 4(%eax), %xmm0 - movq %xmm0, 4(%edx) -L(bk_write_4bytes): - movl (%eax), %ecx - movl %ecx, (%edx) -L(bk_write_0bytes): -# ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -# endif - RETURN - - .p2align 4 -L(bk_write_40bytes): - movq 32(%eax), %xmm0 - movq %xmm0, 32(%edx) -L(bk_write_32bytes): - movq 24(%eax), %xmm0 - movq %xmm0, 24(%edx) -L(bk_write_24bytes): - movq 16(%eax), %xmm0 - movq %xmm0, 16(%edx) -L(bk_write_16bytes): - movq 8(%eax), %xmm0 - movq %xmm0, 8(%edx) -L(bk_write_8bytes): - movq (%eax), %xmm0 - movq %xmm0, (%edx) -# ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -# endif - RETURN - - .p2align 4 -L(bk_write_45bytes): - movq 37(%eax), %xmm0 - movq %xmm0, 37(%edx) -L(bk_write_37bytes): - movq 29(%eax), %xmm0 - movq %xmm0, 29(%edx) -L(bk_write_29bytes): - movq 21(%eax), %xmm0 - movq %xmm0, 21(%edx) -L(bk_write_21bytes): - movq 13(%eax), %xmm0 - movq %xmm0, 13(%edx) -L(bk_write_13bytes): - movq 5(%eax), %xmm0 - movq %xmm0, 5(%edx) -L(bk_write_5bytes): - movl 1(%eax), %ecx - movl %ecx, 1(%edx) -L(bk_write_1bytes): - movzbl (%eax), %ecx - movb %cl, (%edx) -# ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -# endif - RETURN - - .p2align 4 -L(bk_write_41bytes): - movq 33(%eax), %xmm0 - movq %xmm0, 33(%edx) -L(bk_write_33bytes): - movq 25(%eax), %xmm0 - movq %xmm0, 25(%edx) -L(bk_write_25bytes): - movq 17(%eax), %xmm0 - movq %xmm0, 17(%edx) -L(bk_write_17bytes): - movq 9(%eax), %xmm0 - movq %xmm0, 9(%edx) -L(bk_write_9bytes): - movq 1(%eax), %xmm0 - movq %xmm0, 1(%edx) - movzbl (%eax), %ecx - movb %cl, (%edx) -# ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -# endif - RETURN - - .p2align 4 -L(bk_write_46bytes): - movq 38(%eax), %xmm0 - movq %xmm0, 38(%edx) -L(bk_write_38bytes): - movq 30(%eax), %xmm0 - movq %xmm0, 30(%edx) -L(bk_write_30bytes): - movq 22(%eax), %xmm0 - movq %xmm0, 22(%edx) -L(bk_write_22bytes): - movq 14(%eax), %xmm0 - movq %xmm0, 14(%edx) -L(bk_write_14bytes): - movq 6(%eax), %xmm0 - movq %xmm0, 6(%edx) -L(bk_write_6bytes): - movl 2(%eax), %ecx - movl %ecx, 2(%edx) - movzwl (%eax), %ecx - movw %cx, (%edx) -# ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -# endif - RETURN - - .p2align 4 -L(bk_write_42bytes): - movq 34(%eax), %xmm0 - movq %xmm0, 34(%edx) -L(bk_write_34bytes): - movq 26(%eax), %xmm0 - movq %xmm0, 26(%edx) -L(bk_write_26bytes): - movq 18(%eax), %xmm0 - movq %xmm0, 18(%edx) -L(bk_write_18bytes): - movq 10(%eax), %xmm0 - movq %xmm0, 10(%edx) -L(bk_write_10bytes): - movq 2(%eax), %xmm0 - movq %xmm0, 2(%edx) -L(bk_write_2bytes): - movzwl (%eax), %ecx - movw %cx, (%edx) -# ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -# endif - RETURN - - .p2align 4 -L(bk_write_47bytes): - movq 39(%eax), %xmm0 - movq %xmm0, 39(%edx) -L(bk_write_39bytes): - movq 31(%eax), %xmm0 - movq %xmm0, 31(%edx) -L(bk_write_31bytes): - movq 23(%eax), %xmm0 - movq %xmm0, 23(%edx) -L(bk_write_23bytes): - movq 15(%eax), %xmm0 - movq %xmm0, 15(%edx) -L(bk_write_15bytes): - movq 7(%eax), %xmm0 - movq %xmm0, 7(%edx) -L(bk_write_7bytes): - movl 3(%eax), %ecx - movl %ecx, 3(%edx) - movzwl 1(%eax), %ecx - movw %cx, 1(%edx) - movzbl (%eax), %eax - movb %al, (%edx) -# ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -# endif - RETURN - - .p2align 4 -L(bk_write_43bytes): - movq 35(%eax), %xmm0 - movq %xmm0, 35(%edx) -L(bk_write_35bytes): - movq 27(%eax), %xmm0 - movq %xmm0, 27(%edx) -L(bk_write_27bytes): - movq 19(%eax), %xmm0 - movq %xmm0, 19(%edx) -L(bk_write_19bytes): - movq 11(%eax), %xmm0 - movq %xmm0, 11(%edx) -L(bk_write_11bytes): - movq 3(%eax), %xmm0 - movq %xmm0, 3(%edx) -L(bk_write_3bytes): - movzwl 1(%eax), %ecx - movw %cx, 1(%edx) - movzbl (%eax), %eax - movb %al, (%edx) -# ifndef USE_AS_BCOPY - movl DEST(%esp), %eax -# ifdef USE_AS_MEMPCPY - movl LEN(%esp), %ecx - add %ecx, %eax -# endif -# endif - RETURN_END - - - .pushsection .rodata.ssse3,"a",@progbits - .p2align 2 -L(table_48bytes_fwd): - .int JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd)) - .int JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd)) - - .p2align 2 -L(table_48bytes_fwd_align): - .int JMPTBL (L(fwd_write_0bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_1bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_2bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_3bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_4bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_5bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_6bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_7bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_8bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_9bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_10bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_11bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_12bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_13bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_14bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_15bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_16bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_17bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_18bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_19bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_20bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_21bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_22bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_23bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_24bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_25bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_26bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_27bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_28bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_29bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_30bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_31bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_32bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_33bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_34bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_35bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_36bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_37bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_38bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_39bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_40bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_41bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_42bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_43bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_44bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_45bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_46bytes_align), L(table_48bytes_fwd_align)) - .int JMPTBL (L(fwd_write_47bytes_align), L(table_48bytes_fwd_align)) - - .p2align 2 -L(shl_table): - .int JMPTBL (L(shl_0), L(shl_table)) - .int JMPTBL (L(shl_1), L(shl_table)) - .int JMPTBL (L(shl_2), L(shl_table)) - .int JMPTBL (L(shl_3), L(shl_table)) - .int JMPTBL (L(shl_4), L(shl_table)) - .int JMPTBL (L(shl_5), L(shl_table)) - .int JMPTBL (L(shl_6), L(shl_table)) - .int JMPTBL (L(shl_7), L(shl_table)) - .int JMPTBL (L(shl_8), L(shl_table)) - .int JMPTBL (L(shl_9), L(shl_table)) - .int JMPTBL (L(shl_10), L(shl_table)) - .int JMPTBL (L(shl_11), L(shl_table)) - .int JMPTBL (L(shl_12), L(shl_table)) - .int JMPTBL (L(shl_13), L(shl_table)) - .int JMPTBL (L(shl_14), L(shl_table)) - .int JMPTBL (L(shl_15), L(shl_table)) - - .p2align 2 -L(table_48_bytes_bwd): - .int JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd)) - .int JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd)) - - .popsection - -# ifdef USE_AS_MEMMOVE - .p2align 4 -L(copy_backward): - PUSH (%edi) - movl %eax, %edi - lea (%ecx,%edx,1),%edx - lea (%ecx,%edi,1),%edi - testl $0x3, %edx - jnz L(bk_align) - -L(bk_aligned_4): - cmp $64, %ecx - jae L(bk_write_more64bytes) - -L(bk_write_64bytesless): - cmp $32, %ecx - jb L(bk_write_less32bytes) - -L(bk_write_more32bytes): - /* Copy 32 bytes at a time. */ - sub $32, %ecx - movq -8(%edi), %xmm0 - movq %xmm0, -8(%edx) - movq -16(%edi), %xmm0 - movq %xmm0, -16(%edx) - movq -24(%edi), %xmm0 - movq %xmm0, -24(%edx) - movq -32(%edi), %xmm0 - movq %xmm0, -32(%edx) - sub $32, %edx - sub $32, %edi - -L(bk_write_less32bytes): - movl %edi, %eax - sub %ecx, %edx - sub %ecx, %eax - POP (%edi) -L(bk_write_less32bytes_2): - BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) - - CFI_PUSH (%edi) - - .p2align 4 -L(bk_align): - cmp $8, %ecx - jbe L(bk_write_less32bytes) - testl $1, %edx - /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0, - then (EDX & 2) must be != 0. */ - jz L(bk_got2) - sub $1, %edi - sub $1, %ecx - sub $1, %edx - movzbl (%edi), %eax - movb %al, (%edx) - - testl $2, %edx - jz L(bk_aligned_4) - -L(bk_got2): - sub $2, %edi - sub $2, %ecx - sub $2, %edx - movzwl (%edi), %eax - movw %ax, (%edx) - jmp L(bk_aligned_4) - - .p2align 4 -L(bk_write_more64bytes): - /* Check alignment of last byte. */ - testl $15, %edx - jz L(bk_ssse3_cpy_pre) - -/* EDX is aligned 4 bytes, but not 16 bytes. */ -L(bk_ssse3_align): - sub $4, %edi - sub $4, %ecx - sub $4, %edx - movl (%edi), %eax - movl %eax, (%edx) - - testl $15, %edx - jz L(bk_ssse3_cpy_pre) - - sub $4, %edi - sub $4, %ecx - sub $4, %edx - movl (%edi), %eax - movl %eax, (%edx) - - testl $15, %edx - jz L(bk_ssse3_cpy_pre) - - sub $4, %edi - sub $4, %ecx - sub $4, %edx - movl (%edi), %eax - movl %eax, (%edx) - -L(bk_ssse3_cpy_pre): - cmp $64, %ecx - jb L(bk_write_more32bytes) - - .p2align 4 -L(bk_ssse3_cpy): - sub $64, %edi - sub $64, %ecx - sub $64, %edx - movdqu 0x30(%edi), %xmm3 - movdqa %xmm3, 0x30(%edx) - movdqu 0x20(%edi), %xmm2 - movdqa %xmm2, 0x20(%edx) - movdqu 0x10(%edi), %xmm1 - movdqa %xmm1, 0x10(%edx) - movdqu (%edi), %xmm0 - movdqa %xmm0, (%edx) - cmp $64, %ecx - jae L(bk_ssse3_cpy) - jmp L(bk_write_64bytesless) - -# endif - -END (MEMCPY) - -#endif diff --git a/sysdeps/i386/i686/multiarch/memcpy.S b/sysdeps/i386/i686/multiarch/memcpy.S deleted file mode 100644 index f725944620..0000000000 --- a/sysdeps/i386/i686/multiarch/memcpy.S +++ /dev/null @@ -1,78 +0,0 @@ -/* Multiple versions of memcpy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib and for - DSO. In static binaries we need memcpy before the initialization - happened. */ -#if defined SHARED && IS_IN (libc) - .text -ENTRY(memcpy) - .type memcpy, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__memcpy_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__memcpy_sse2_unaligned) - HAS_ARCH_FEATURE (Fast_Unaligned_Load) - jnz 2f - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__memcpy_ssse3) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__memcpy_ssse3_rep) -2: ret -END(memcpy) - -# undef ENTRY -# define ENTRY(name) \ - .type __memcpy_ia32, @function; \ - .p2align 4; \ - .globl __memcpy_ia32; \ - .hidden __memcpy_ia32; \ - __memcpy_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __memcpy_ia32, .-__memcpy_ia32 - -# undef ENTRY_CHK -# define ENTRY_CHK(name) \ - .type __memcpy_chk_ia32, @function; \ - .globl __memcpy_chk_ia32; \ - .p2align 4; \ - __memcpy_chk_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END_CHK -# define END_CHK(name) \ - cfi_endproc; .size __memcpy_chk_ia32, .-__memcpy_chk_ia32 - -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_memcpy; __GI_memcpy = __memcpy_ia32 -#endif - -#include "../memcpy.S" diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.S b/sysdeps/i386/i686/multiarch/memcpy_chk.S deleted file mode 100644 index 1b4fbe2e6f..0000000000 --- a/sysdeps/i386/i686/multiarch/memcpy_chk.S +++ /dev/null @@ -1,50 +0,0 @@ -/* Multiple versions of __memcpy_chk - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib and for - DSO. There are no multiarch memcpy functions for static binaries. - */ -#if IS_IN (libc) -# ifdef SHARED - .text -ENTRY(__memcpy_chk) - .type __memcpy_chk, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__memcpy_chk_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__memcpy_chk_sse2_unaligned) - HAS_ARCH_FEATURE (Fast_Unaligned_Load) - jnz 2f - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__memcpy_chk_ssse3) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__memcpy_chk_ssse3_rep) -2: ret -END(__memcpy_chk) -# else -# include "../memcpy_chk.S" -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S deleted file mode 100644 index 3873594cb2..0000000000 --- a/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMMOVE -#define MEMCPY __memmove_sse2_unaligned -#define MEMCPY_CHK __memmove_chk_sse2_unaligned -#include "memcpy-sse2-unaligned.S" diff --git a/sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S b/sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S deleted file mode 100644 index d202fc4a13..0000000000 --- a/sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMMOVE -#define MEMCPY __memmove_ssse3_rep -#define MEMCPY_CHK __memmove_chk_ssse3_rep -#include "memcpy-ssse3-rep.S" diff --git a/sysdeps/i386/i686/multiarch/memmove-ssse3.S b/sysdeps/i386/i686/multiarch/memmove-ssse3.S deleted file mode 100644 index 295430b1ef..0000000000 --- a/sysdeps/i386/i686/multiarch/memmove-ssse3.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMMOVE -#define MEMCPY __memmove_ssse3 -#define MEMCPY_CHK __memmove_chk_ssse3 -#include "memcpy-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/memmove.S b/sysdeps/i386/i686/multiarch/memmove.S deleted file mode 100644 index 6eb418ca7f..0000000000 --- a/sysdeps/i386/i686/multiarch/memmove.S +++ /dev/null @@ -1,89 +0,0 @@ -/* Multiple versions of memmove - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib. */ -#if IS_IN (libc) - .text -ENTRY(memmove) - .type memmove, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__memmove_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__memmove_sse2_unaligned) - HAS_ARCH_FEATURE (Fast_Unaligned_Load) - jnz 2f - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__memmove_ssse3) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__memmove_ssse3_rep) -2: ret -END(memmove) - -# ifdef SHARED -# undef ENTRY -# define ENTRY(name) \ - .type __memmove_ia32, @function; \ - .p2align 4; \ - .globl __memmove_ia32; \ - .hidden __memmove_ia32; \ - __memmove_ia32: cfi_startproc; \ - CALL_MCOUNT -# else -# undef ENTRY -# define ENTRY(name) \ - .type __memmove_ia32, @function; \ - .globl __memmove_ia32; \ - .p2align 4; \ - __memmove_ia32: cfi_startproc; \ - CALL_MCOUNT -# endif - -# undef END -# define END(name) \ - cfi_endproc; .size __memmove_ia32, .-__memmove_ia32 - -# undef ENTRY_CHK -# define ENTRY_CHK(name) \ - .type __memmove_chk_ia32, @function; \ - .globl __memmove_chk_ia32; \ - .p2align 4; \ - __memmove_chk_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END_CHK -# define END_CHK(name) \ - cfi_endproc; .size __memmove_chk_ia32, .-__memmove_chk_ia32 - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_memmove; __GI_memmove = __memmove_ia32 -# endif -#endif - -#include "../memmove.S" diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.S b/sysdeps/i386/i686/multiarch/memmove_chk.S deleted file mode 100644 index 314834c4c6..0000000000 --- a/sysdeps/i386/i686/multiarch/memmove_chk.S +++ /dev/null @@ -1,94 +0,0 @@ -/* Multiple versions of __memmove_chk - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib. */ -#if IS_IN (libc) - .text -ENTRY(__memmove_chk) - .type __memmove_chk, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__memmove_chk_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__memmove_chk_sse2_unaligned) - HAS_ARCH_FEATURE (Fast_Unaligned_Load) - jnz 2f - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__memmove_chk_ssse3) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__memmove_chk_ssse3_rep) -2: ret -END(__memmove_chk) - -# ifndef SHARED - .type __memmove_chk_sse2_unaligned, @function - .p2align 4; -__memmove_chk_sse2_unaligned: - cfi_startproc - CALL_MCOUNT - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp __memmove_sse2_unaligned - cfi_endproc - .size __memmove_chk_sse2_unaligned, .-__memmove_chk_sse2_unaligned - - .type __memmove_chk_ssse3, @function - .p2align 4; -__memmove_chk_ssse3: - cfi_startproc - CALL_MCOUNT - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp __memmove_ssse3 - cfi_endproc - .size __memmove_chk_ssse3, .-__memmove_chk_ssse3 - - .type __memmove_chk_ssse3_rep, @function - .p2align 4; -__memmove_chk_ssse3_rep: - cfi_startproc - CALL_MCOUNT - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp __memmove_ssse3_rep - cfi_endproc - .size __memmove_chk_ssse3_rep, .-__memmove_chk_ssse3_rep - - .type __memmove_chk_ia32, @function - .p2align 4; -__memmove_chk_ia32: - cfi_startproc - CALL_MCOUNT - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp __memmove_ia32 - cfi_endproc - .size __memmove_chk_ia32, .-__memmove_chk_ia32 -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S deleted file mode 100644 index a1cea50771..0000000000 --- a/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMPCPY -#define MEMCPY __mempcpy_sse2_unaligned -#define MEMCPY_CHK __mempcpy_chk_sse2_unaligned -#include "memcpy-sse2-unaligned.S" diff --git a/sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S b/sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S deleted file mode 100644 index 5357b33e18..0000000000 --- a/sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMPCPY -#define MEMCPY __mempcpy_ssse3_rep -#define MEMCPY_CHK __mempcpy_chk_ssse3_rep -#include "memcpy-ssse3-rep.S" diff --git a/sysdeps/i386/i686/multiarch/mempcpy-ssse3.S b/sysdeps/i386/i686/multiarch/mempcpy-ssse3.S deleted file mode 100644 index 822d98e954..0000000000 --- a/sysdeps/i386/i686/multiarch/mempcpy-ssse3.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_MEMPCPY -#define MEMCPY __mempcpy_ssse3 -#define MEMCPY_CHK __mempcpy_chk_ssse3 -#include "memcpy-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/mempcpy.S b/sysdeps/i386/i686/multiarch/mempcpy.S deleted file mode 100644 index 06e377fbc9..0000000000 --- a/sysdeps/i386/i686/multiarch/mempcpy.S +++ /dev/null @@ -1,81 +0,0 @@ -/* Multiple versions of mempcpy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib and for - DSO. In static binaries we need mempcpy before the initialization - happened. */ -#if defined SHARED && IS_IN (libc) - .text -ENTRY(__mempcpy) - .type __mempcpy, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__mempcpy_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__mempcpy_sse2_unaligned) - HAS_ARCH_FEATURE (Fast_Unaligned_Load) - jnz 2f - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__mempcpy_ssse3) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__mempcpy_ssse3_rep) -2: ret -END(__mempcpy) - -# undef ENTRY -# define ENTRY(name) \ - .type __mempcpy_ia32, @function; \ - .p2align 4; \ - .globl __mempcpy_ia32; \ - .hidden __mempcpy_ia32; \ - __mempcpy_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __mempcpy_ia32, .-__mempcpy_ia32 - -# undef ENTRY_CHK -# define ENTRY_CHK(name) \ - .type __mempcpy_chk_ia32, @function; \ - .globl __mempcpy_chk_ia32; \ - .p2align 4; \ - __mempcpy_chk_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END_CHK -# define END_CHK(name) \ - cfi_endproc; .size __mempcpy_chk_ia32, .-__mempcpy_chk_ia32 - -# undef libc_hidden_def -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_def(name) \ - .globl __GI_mempcpy; __GI_mempcpy = __mempcpy_ia32 -# define libc_hidden_builtin_def(name) \ - .globl __GI___mempcpy; __GI___mempcpy = __mempcpy_ia32 -#endif - -#include "../mempcpy.S" diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.S b/sysdeps/i386/i686/multiarch/mempcpy_chk.S deleted file mode 100644 index e13e5248a5..0000000000 --- a/sysdeps/i386/i686/multiarch/mempcpy_chk.S +++ /dev/null @@ -1,50 +0,0 @@ -/* Multiple versions of __mempcpy_chk - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib and for - DSO. There are no multiarch mempcpy functions for static binaries. - */ -#if IS_IN (libc) -# ifdef SHARED - .text -ENTRY(__mempcpy_chk) - .type __mempcpy_chk, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__mempcpy_chk_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__mempcpy_chk_sse2_unaligned) - HAS_ARCH_FEATURE (Fast_Unaligned_Load) - jnz 2f - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__mempcpy_chk_ssse3) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__mempcpy_chk_ssse3_rep) -2: ret -END(__mempcpy_chk) -# else -# include "../mempcpy_chk.S" -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/memrchr-c.c b/sysdeps/i386/i686/multiarch/memrchr-c.c deleted file mode 100644 index ef7bbbe792..0000000000 --- a/sysdeps/i386/i686/multiarch/memrchr-c.c +++ /dev/null @@ -1,7 +0,0 @@ -#if IS_IN (libc) -# define MEMRCHR __memrchr_ia32 -# include <string.h> -extern void *__memrchr_ia32 (const void *, int, size_t); -#endif - -#include "string/memrchr.c" diff --git a/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S deleted file mode 100644 index dbbe94fd08..0000000000 --- a/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S +++ /dev/null @@ -1,417 +0,0 @@ -/* Optimized memrchr with sse2 - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 4 -# define STR1 PARMS -# define STR2 STR1+4 -# define LEN STR2+4 - -# define MEMCHR __memrchr_sse2_bsf - - .text -ENTRY (MEMCHR) - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - mov LEN(%esp), %edx - - sub $16, %edx - jbe L(length_less16) - - punpcklbw %xmm1, %xmm1 - add %edx, %ecx - punpcklbw %xmm1, %xmm1 - - movdqu (%ecx), %xmm0 - pshufd $0, %xmm1, %xmm1 - pcmpeqb %xmm1, %xmm0 - -/* Check if there is a match. */ - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches0) - - sub $64, %ecx - mov %ecx, %eax - and $15, %eax - jz L(loop_prolog) - - add $16, %ecx - add $16, %edx - sub %eax, %ecx - sub %eax, %edx - - .p2align 4 -/* Loop start on aligned string. */ -L(loop_prolog): - sub $64, %edx - jbe L(exit_loop) - - movdqa 48(%ecx), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48) - - movdqa 32(%ecx), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%ecx), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches16) - - movdqa (%ecx), %xmm4 - pcmpeqb %xmm1, %xmm4 - pmovmskb %xmm4, %eax - test %eax, %eax - jnz L(matches0) - - sub $64, %ecx - sub $64, %edx - jbe L(exit_loop) - - movdqa 48(%ecx), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48) - - movdqa 32(%ecx), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%ecx), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches16) - - movdqa (%ecx), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches0) - - mov %ecx, %eax - and $63, %eax - test %eax, %eax - jz L(align64_loop) - - add $64, %ecx - add $64, %edx - sub %eax, %ecx - sub %eax, %edx - - .p2align 4 -L(align64_loop): - sub $64, %ecx - sub $64, %edx - jbe L(exit_loop) - - movdqa (%ecx), %xmm0 - movdqa 16(%ecx), %xmm2 - movdqa 32(%ecx), %xmm3 - movdqa 48(%ecx), %xmm4 - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm1, %xmm2 - pcmpeqb %xmm1, %xmm3 - pcmpeqb %xmm1, %xmm4 - - pmaxub %xmm3, %xmm0 - pmaxub %xmm4, %xmm2 - pmaxub %xmm0, %xmm2 - pmovmskb %xmm2, %eax - - test %eax, %eax - jz L(align64_loop) - - pmovmskb %xmm4, %eax - test %eax, %eax - jnz L(matches48) - - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%ecx), %xmm2 - - pcmpeqb %xmm1, %xmm2 - pcmpeqb (%ecx), %xmm1 - - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches16) - - pmovmskb %xmm1, %eax - bsr %eax, %eax - - add %ecx, %eax - ret - - .p2align 4 -L(exit_loop): - add $64, %edx - cmp $32, %edx - jbe L(exit_loop_32) - - movdqa 48(%ecx), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48) - - movdqa 32(%ecx), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%ecx), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches16_1) - cmp $48, %edx - jbe L(return_null) - - pcmpeqb (%ecx), %xmm1 - pmovmskb %xmm1, %eax - test %eax, %eax - jnz L(matches0_1) - xor %eax, %eax - ret - - .p2align 4 -L(exit_loop_32): - movdqa 48(%ecx), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48_1) - cmp $16, %edx - jbe L(return_null) - - pcmpeqb 32(%ecx), %xmm1 - pmovmskb %xmm1, %eax - test %eax, %eax - jnz L(matches32_1) - xor %eax, %eax - ret - - .p2align 4 -L(matches0): - bsr %eax, %eax - add %ecx, %eax - ret - - .p2align 4 -L(matches16): - bsr %eax, %eax - lea 16(%eax, %ecx), %eax - ret - - .p2align 4 -L(matches32): - bsr %eax, %eax - lea 32(%eax, %ecx), %eax - ret - - .p2align 4 -L(matches48): - bsr %eax, %eax - lea 48(%eax, %ecx), %eax - ret - - .p2align 4 -L(matches0_1): - bsr %eax, %eax - sub $64, %edx - add %eax, %edx - jl L(return_null) - add %ecx, %eax - ret - - .p2align 4 -L(matches16_1): - bsr %eax, %eax - sub $48, %edx - add %eax, %edx - jl L(return_null) - lea 16(%ecx, %eax), %eax - ret - - .p2align 4 -L(matches32_1): - bsr %eax, %eax - sub $32, %edx - add %eax, %edx - jl L(return_null) - lea 32(%ecx, %eax), %eax - ret - - .p2align 4 -L(matches48_1): - bsr %eax, %eax - sub $16, %edx - add %eax, %edx - jl L(return_null) - lea 48(%ecx, %eax), %eax - ret - - .p2align 4 -L(return_null): - xor %eax, %eax - ret - - .p2align 4 -L(length_less16_offset0): - mov %dl, %cl - pcmpeqb (%eax), %xmm1 - - mov $1, %edx - sal %cl, %edx - sub $1, %edx - mov %edx, %ecx - - pmovmskb %xmm1, %edx - - and %ecx, %edx - test %edx, %edx - jz L(return_null) - - bsr %edx, %ecx - add %ecx, %eax - ret - - .p2align 4 -L(length_less16): - punpcklbw %xmm1, %xmm1 - mov %ecx, %eax - punpcklbw %xmm1, %xmm1 - add $16, %edx - jz L(return_null) - - pshufd $0, %xmm1, %xmm1 - and $15, %ecx - jz L(length_less16_offset0) - - PUSH (%edi) - mov %cl, %dh - add %dl, %dh - and $-16, %eax - - sub $16, %dh - ja L(length_less16_part2) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edi - - sar %cl, %edi - add %ecx, %eax - mov %dl, %cl - - mov $1, %edx - sal %cl, %edx - sub $1, %edx - - and %edx, %edi - test %edi, %edi - jz L(ret_null) - - bsr %edi, %edi - add %edi, %eax - POP (%edi) - ret - - CFI_PUSH (%edi) - - .p2align 4 -L(length_less16_part2): - movdqa 16(%eax), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %edi - - mov %cl, %ch - - mov %dh, %cl - mov $1, %edx - sal %cl, %edx - sub $1, %edx - - and %edx, %edi - - test %edi, %edi - jnz L(length_less16_part2_return) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edi - - mov %ch, %cl - sar %cl, %edi - test %edi, %edi - jz L(ret_null) - - bsr %edi, %edi - add %edi, %eax - xor %ch, %ch - add %ecx, %eax - POP (%edi) - ret - - CFI_PUSH (%edi) - - .p2align 4 -L(length_less16_part2_return): - bsr %edi, %edi - lea 16(%eax, %edi), %eax - POP (%edi) - ret - - CFI_PUSH (%edi) - - .p2align 4 -L(ret_null): - xor %eax, %eax - POP (%edi) - ret - -END (MEMCHR) -#endif diff --git a/sysdeps/i386/i686/multiarch/memrchr-sse2.S b/sysdeps/i386/i686/multiarch/memrchr-sse2.S deleted file mode 100644 index 5f7853f683..0000000000 --- a/sysdeps/i386/i686/multiarch/memrchr-sse2.S +++ /dev/null @@ -1,724 +0,0 @@ -/* Optimized memrchr with sse2 without bsf - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 4 -# define STR1 PARMS -# define STR2 STR1+4 -# define LEN STR2+4 - - atom_text_section -ENTRY (__memrchr_sse2) - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - mov LEN(%esp), %edx - - sub $16, %edx - jbe L(length_less16) - - punpcklbw %xmm1, %xmm1 - add %edx, %ecx - punpcklbw %xmm1, %xmm1 - - movdqu (%ecx), %xmm0 - pshufd $0, %xmm1, %xmm1 - pcmpeqb %xmm1, %xmm0 - - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(exit_dispatch) - - sub $64, %ecx - mov %ecx, %eax - and $15, %eax - jz L(loop_prolog) - - lea 16(%ecx), %ecx - lea 16(%edx), %edx - sub %eax, %edx - and $-16, %ecx - - .p2align 4 -/* Loop start on aligned string. */ -L(loop_prolog): - sub $64, %edx - jbe L(exit_loop) - - movdqa 48(%ecx), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48) - - movdqa 32(%ecx), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%ecx), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches16) - - movdqa (%ecx), %xmm4 - pcmpeqb %xmm1, %xmm4 - pmovmskb %xmm4, %eax - test %eax, %eax - jnz L(exit_dispatch) - - sub $64, %ecx - sub $64, %edx - jbe L(exit_loop) - - movdqa 48(%ecx), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48) - - movdqa 32(%ecx), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%ecx), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches16) - - movdqa (%ecx), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(exit_dispatch) - - mov %ecx, %eax - and $63, %eax - test %eax, %eax - jz L(align64_loop) - - lea 64(%ecx), %ecx - lea 64(%edx), %edx - and $-64, %ecx - sub %eax, %edx - - .p2align 4 -L(align64_loop): - sub $64, %ecx - sub $64, %edx - jbe L(exit_loop) - - movdqa (%ecx), %xmm0 - movdqa 16(%ecx), %xmm2 - movdqa 32(%ecx), %xmm3 - movdqa 48(%ecx), %xmm4 - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm1, %xmm2 - pcmpeqb %xmm1, %xmm3 - pcmpeqb %xmm1, %xmm4 - - pmaxub %xmm3, %xmm0 - pmaxub %xmm4, %xmm2 - pmaxub %xmm0, %xmm2 - pmovmskb %xmm2, %eax - - test %eax, %eax - jz L(align64_loop) - - pmovmskb %xmm4, %eax - test %eax, %eax - jnz L(matches48) - - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%ecx), %xmm2 - - pcmpeqb %xmm1, %xmm2 - pcmpeqb (%ecx), %xmm1 - - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches16) - - pmovmskb %xmm1, %eax - test %ah, %ah - jnz L(exit_dispatch_high) - mov %al, %dl - and $15 << 4, %dl - jnz L(exit_dispatch_8) - test $0x08, %al - jnz L(exit_4) - test $0x04, %al - jnz L(exit_3) - test $0x02, %al - jnz L(exit_2) - mov %ecx, %eax - ret - - .p2align 4 -L(exit_loop): - add $64, %edx - cmp $32, %edx - jbe L(exit_loop_32) - - movdqa 48(%ecx), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48) - - movdqa 32(%ecx), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %eax - test %eax, %eax - jnz L(matches32) - - movdqa 16(%ecx), %xmm3 - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(matches16_1) - cmp $48, %edx - jbe L(return_null) - - pcmpeqb (%ecx), %xmm1 - pmovmskb %xmm1, %eax - test %eax, %eax - jnz L(matches0_1) - xor %eax, %eax - ret - - .p2align 4 -L(exit_loop_32): - movdqa 48(%ecx), %xmm0 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches48_1) - cmp $16, %edx - jbe L(return_null) - - pcmpeqb 32(%ecx), %xmm1 - pmovmskb %xmm1, %eax - test %eax, %eax - jnz L(matches32_1) - xor %eax, %eax - ret - - .p2align 4 -L(matches16): - lea 16(%ecx), %ecx - test %ah, %ah - jnz L(exit_dispatch_high) - mov %al, %dl - and $15 << 4, %dl - jnz L(exit_dispatch_8) - test $0x08, %al - jnz L(exit_4) - test $0x04, %al - jnz L(exit_3) - test $0x02, %al - jnz L(exit_2) - mov %ecx, %eax - ret - - .p2align 4 -L(matches32): - lea 32(%ecx), %ecx - test %ah, %ah - jnz L(exit_dispatch_high) - mov %al, %dl - and $15 << 4, %dl - jnz L(exit_dispatch_8) - test $0x08, %al - jnz L(exit_4) - test $0x04, %al - jnz L(exit_3) - test $0x02, %al - jnz L(exit_2) - mov %ecx, %eax - ret - - .p2align 4 -L(matches48): - lea 48(%ecx), %ecx - - .p2align 4 -L(exit_dispatch): - test %ah, %ah - jnz L(exit_dispatch_high) - mov %al, %dl - and $15 << 4, %dl - jnz L(exit_dispatch_8) - test $0x08, %al - jnz L(exit_4) - test $0x04, %al - jnz L(exit_3) - test $0x02, %al - jnz L(exit_2) - mov %ecx, %eax - ret - - .p2align 4 -L(exit_dispatch_8): - test $0x80, %al - jnz L(exit_8) - test $0x40, %al - jnz L(exit_7) - test $0x20, %al - jnz L(exit_6) - lea 4(%ecx), %eax - ret - - .p2align 4 -L(exit_dispatch_high): - mov %ah, %dh - and $15 << 4, %dh - jnz L(exit_dispatch_high_8) - test $0x08, %ah - jnz L(exit_12) - test $0x04, %ah - jnz L(exit_11) - test $0x02, %ah - jnz L(exit_10) - lea 8(%ecx), %eax - ret - - .p2align 4 -L(exit_dispatch_high_8): - test $0x80, %ah - jnz L(exit_16) - test $0x40, %ah - jnz L(exit_15) - test $0x20, %ah - jnz L(exit_14) - lea 12(%ecx), %eax - ret - - .p2align 4 -L(exit_2): - lea 1(%ecx), %eax - ret - - .p2align 4 -L(exit_3): - lea 2(%ecx), %eax - ret - - .p2align 4 -L(exit_4): - lea 3(%ecx), %eax - ret - - .p2align 4 -L(exit_6): - lea 5(%ecx), %eax - ret - - .p2align 4 -L(exit_7): - lea 6(%ecx), %eax - ret - - .p2align 4 -L(exit_8): - lea 7(%ecx), %eax - ret - - .p2align 4 -L(exit_10): - lea 9(%ecx), %eax - ret - - .p2align 4 -L(exit_11): - lea 10(%ecx), %eax - ret - - .p2align 4 -L(exit_12): - lea 11(%ecx), %eax - ret - - .p2align 4 -L(exit_14): - lea 13(%ecx), %eax - ret - - .p2align 4 -L(exit_15): - lea 14(%ecx), %eax - ret - - .p2align 4 -L(exit_16): - lea 15(%ecx), %eax - ret - - .p2align 4 -L(matches0_1): - lea -64(%edx), %edx - - test %ah, %ah - jnz L(exit_dispatch_1_high) - mov %al, %ah - and $15 << 4, %ah - jnz L(exit_dispatch_1_8) - test $0x08, %al - jnz L(exit_1_4) - test $0x04, %al - jnz L(exit_1_3) - test $0x02, %al - jnz L(exit_1_2) - add $0, %edx - jl L(return_null) - mov %ecx, %eax - ret - - .p2align 4 -L(matches16_1): - lea -48(%edx), %edx - lea 16(%ecx), %ecx - - test %ah, %ah - jnz L(exit_dispatch_1_high) - mov %al, %ah - and $15 << 4, %ah - jnz L(exit_dispatch_1_8) - test $0x08, %al - jnz L(exit_1_4) - test $0x04, %al - jnz L(exit_1_3) - test $0x02, %al - jnz L(exit_1_2) - add $0, %edx - jl L(return_null) - mov %ecx, %eax - ret - - .p2align 4 -L(matches32_1): - lea -32(%edx), %edx - lea 32(%ecx), %ecx - - test %ah, %ah - jnz L(exit_dispatch_1_high) - mov %al, %ah - and $15 << 4, %ah - jnz L(exit_dispatch_1_8) - test $0x08, %al - jnz L(exit_1_4) - test $0x04, %al - jnz L(exit_1_3) - test $0x02, %al - jnz L(exit_1_2) - add $0, %edx - jl L(return_null) - mov %ecx, %eax - ret - - .p2align 4 -L(matches48_1): - lea -16(%edx), %edx - lea 48(%ecx), %ecx - - .p2align 4 -L(exit_dispatch_1): - test %ah, %ah - jnz L(exit_dispatch_1_high) - mov %al, %ah - and $15 << 4, %ah - jnz L(exit_dispatch_1_8) - test $0x08, %al - jnz L(exit_1_4) - test $0x04, %al - jnz L(exit_1_3) - test $0x02, %al - jnz L(exit_1_2) - add $0, %edx - jl L(return_null) - mov %ecx, %eax - ret - - .p2align 4 -L(exit_dispatch_1_8): - test $0x80, %al - jnz L(exit_1_8) - test $0x40, %al - jnz L(exit_1_7) - test $0x20, %al - jnz L(exit_1_6) - add $4, %edx - jl L(return_null) - lea 4(%ecx), %eax - ret - - .p2align 4 -L(exit_dispatch_1_high): - mov %ah, %al - and $15 << 4, %al - jnz L(exit_dispatch_1_high_8) - test $0x08, %ah - jnz L(exit_1_12) - test $0x04, %ah - jnz L(exit_1_11) - test $0x02, %ah - jnz L(exit_1_10) - add $8, %edx - jl L(return_null) - lea 8(%ecx), %eax - ret - - .p2align 4 -L(exit_dispatch_1_high_8): - test $0x80, %ah - jnz L(exit_1_16) - test $0x40, %ah - jnz L(exit_1_15) - test $0x20, %ah - jnz L(exit_1_14) - add $12, %edx - jl L(return_null) - lea 12(%ecx), %eax - ret - - .p2align 4 -L(exit_1_2): - add $1, %edx - jl L(return_null) - lea 1(%ecx), %eax - ret - - .p2align 4 -L(exit_1_3): - add $2, %edx - jl L(return_null) - lea 2(%ecx), %eax - ret - - .p2align 4 -L(exit_1_4): - add $3, %edx - jl L(return_null) - lea 3(%ecx), %eax - ret - - .p2align 4 -L(exit_1_6): - add $5, %edx - jl L(return_null) - lea 5(%ecx), %eax - ret - - .p2align 4 -L(exit_1_7): - add $6, %edx - jl L(return_null) - lea 6(%ecx), %eax - ret - - .p2align 4 -L(exit_1_8): - add $7, %edx - jl L(return_null) - lea 7(%ecx), %eax - ret - - .p2align 4 -L(exit_1_10): - add $9, %edx - jl L(return_null) - lea 9(%ecx), %eax - ret - - .p2align 4 -L(exit_1_11): - add $10, %edx - jl L(return_null) - lea 10(%ecx), %eax - ret - - .p2align 4 -L(exit_1_12): - add $11, %edx - jl L(return_null) - lea 11(%ecx), %eax - ret - - .p2align 4 -L(exit_1_14): - add $13, %edx - jl L(return_null) - lea 13(%ecx), %eax - ret - - .p2align 4 -L(exit_1_15): - add $14, %edx - jl L(return_null) - lea 14(%ecx), %eax - ret - - .p2align 4 -L(exit_1_16): - add $15, %edx - jl L(return_null) - lea 15(%ecx), %eax - ret - - .p2align 4 -L(return_null): - xor %eax, %eax - ret - - .p2align 4 -L(length_less16_offset0): - mov %dl, %cl - pcmpeqb (%eax), %xmm1 - - mov $1, %edx - sal %cl, %edx - sub $1, %edx - - mov %eax, %ecx - pmovmskb %xmm1, %eax - - and %edx, %eax - test %eax, %eax - jnz L(exit_dispatch) - - xor %eax, %eax - ret - - .p2align 4 -L(length_less16): - punpcklbw %xmm1, %xmm1 - add $16, %edx - je L(return_null) - punpcklbw %xmm1, %xmm1 - - mov %ecx, %eax - pshufd $0, %xmm1, %xmm1 - - and $15, %ecx - jz L(length_less16_offset0) - - PUSH (%edi) - - mov %cl, %dh - add %dl, %dh - and $-16, %eax - - sub $16, %dh - ja L(length_less16_part2) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edi - - sar %cl, %edi - add %ecx, %eax - mov %dl, %cl - - mov $1, %edx - sal %cl, %edx - sub $1, %edx - - and %edx, %edi - test %edi, %edi - jz L(ret_null) - - bsr %edi, %edi - add %edi, %eax - POP (%edi) - ret - - CFI_PUSH (%edi) - - .p2align 4 -L(length_less16_part2): - movdqa 16(%eax), %xmm2 - pcmpeqb %xmm1, %xmm2 - pmovmskb %xmm2, %edi - - mov %cl, %ch - - mov %dh, %cl - mov $1, %edx - sal %cl, %edx - sub $1, %edx - - and %edx, %edi - - test %edi, %edi - jnz L(length_less16_part2_return) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edi - - mov %ch, %cl - sar %cl, %edi - test %edi, %edi - jz L(ret_null) - - bsr %edi, %edi - add %edi, %eax - xor %ch, %ch - add %ecx, %eax - POP (%edi) - ret - - CFI_PUSH (%edi) - - .p2align 4 -L(length_less16_part2_return): - bsr %edi, %edi - lea 16(%eax, %edi), %eax - POP (%edi) - ret - - CFI_PUSH (%edi) - - .p2align 4 -L(ret_null): - xor %eax, %eax - POP (%edi) - ret - -END (__memrchr_sse2) -#endif diff --git a/sysdeps/i386/i686/multiarch/memrchr.S b/sysdeps/i386/i686/multiarch/memrchr.S deleted file mode 100644 index d4253a553b..0000000000 --- a/sysdeps/i386/i686/multiarch/memrchr.S +++ /dev/null @@ -1,45 +0,0 @@ -/* Multiple versions of memrchr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if IS_IN (libc) - .text -ENTRY(__memrchr) - .type __memrchr, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - HAS_CPU_FEATURE (SSE2) - jz 2f - HAS_ARCH_FEATURE (Slow_BSF) - jz 3f - - LOAD_FUNC_GOT_EAX (__memrchr_sse2) - ret - -2: LOAD_FUNC_GOT_EAX (__memrchr_ia32) - ret - -3: LOAD_FUNC_GOT_EAX (__memrchr_sse2_bsf) - ret -END(__memrchr) - -weak_alias(__memrchr, memrchr) -#endif diff --git a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S deleted file mode 100644 index 3221077e49..0000000000 --- a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S +++ /dev/null @@ -1,811 +0,0 @@ -/* memset with SSE2 and REP string. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -#include <sysdep.h> -#include "asm-syntax.h" - -#define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -#define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -#define PUSH(REG) pushl REG; CFI_PUSH (REG) -#define POP(REG) popl REG; CFI_POP (REG) - -#ifdef USE_AS_BZERO -# define DEST PARMS -# define LEN DEST+4 -# define SETRTNVAL -#else -# define DEST PARMS -# define CHR DEST+4 -# define LEN CHR+4 -# define SETRTNVAL movl DEST(%esp), %eax -#endif - -#ifdef SHARED -# define ENTRANCE PUSH (%ebx); -# define RETURN_END POP (%ebx); ret -# define RETURN RETURN_END; CFI_PUSH (%ebx) -# define PARMS 8 /* Preserve EBX. */ -# define JMPTBL(I, B) I - B - -/* Load an entry in a jump table into EBX and branch to it. TABLE is a - jump table with relative offsets. */ -# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ - /* We first load PC into EBX. */ \ - SETUP_PIC_REG(bx); \ - /* Get the address of the jump table. */ \ - add $(TABLE - .), %ebx; \ - /* Get the entry and convert the relative offset to the \ - absolute address. */ \ - add (%ebx,%ecx,4), %ebx; \ - add %ecx, %edx; \ - /* We loaded the jump table and adjusted EDX. Go. */ \ - jmp *%ebx -#else -# define ENTRANCE -# define RETURN_END ret -# define RETURN RETURN_END -# define PARMS 4 -# define JMPTBL(I, B) I - -/* Branch to an entry in a jump table. TABLE is a jump table with - absolute offsets. */ -# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ - add %ecx, %edx; \ - jmp *TABLE(,%ecx,4) -#endif - - .section .text.sse2,"ax",@progbits -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO -ENTRY (__memset_chk_sse2_rep) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END (__memset_chk_sse2_rep) -#endif -ENTRY (__memset_sse2_rep) - ENTRANCE - - movl LEN(%esp), %ecx -#ifdef USE_AS_BZERO - xor %eax, %eax -#else - movzbl CHR(%esp), %eax - movb %al, %ah - /* Fill the whole EAX with pattern. */ - movl %eax, %edx - shl $16, %eax - or %edx, %eax -#endif - movl DEST(%esp), %edx - cmp $32, %ecx - jae L(32bytesormore) - -L(write_less32bytes): - BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes)) - - - .pushsection .rodata.sse2,"a",@progbits - ALIGN (2) -L(table_less_32bytes): - .int JMPTBL (L(write_0bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_1bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_2bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_3bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_4bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_5bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_6bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_7bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_8bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_9bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_10bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_11bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_12bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_13bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_14bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_15bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_16bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_17bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_18bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_19bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_20bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_21bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_22bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_23bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_24bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_25bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_26bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_27bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_28bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_29bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_30bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_31bytes), L(table_less_32bytes)) - .popsection - - ALIGN (4) -L(write_28bytes): - movl %eax, -28(%edx) -L(write_24bytes): - movl %eax, -24(%edx) -L(write_20bytes): - movl %eax, -20(%edx) -L(write_16bytes): - movl %eax, -16(%edx) -L(write_12bytes): - movl %eax, -12(%edx) -L(write_8bytes): - movl %eax, -8(%edx) -L(write_4bytes): - movl %eax, -4(%edx) -L(write_0bytes): - SETRTNVAL - RETURN - - ALIGN (4) -L(write_29bytes): - movl %eax, -29(%edx) -L(write_25bytes): - movl %eax, -25(%edx) -L(write_21bytes): - movl %eax, -21(%edx) -L(write_17bytes): - movl %eax, -17(%edx) -L(write_13bytes): - movl %eax, -13(%edx) -L(write_9bytes): - movl %eax, -9(%edx) -L(write_5bytes): - movl %eax, -5(%edx) -L(write_1bytes): - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(write_30bytes): - movl %eax, -30(%edx) -L(write_26bytes): - movl %eax, -26(%edx) -L(write_22bytes): - movl %eax, -22(%edx) -L(write_18bytes): - movl %eax, -18(%edx) -L(write_14bytes): - movl %eax, -14(%edx) -L(write_10bytes): - movl %eax, -10(%edx) -L(write_6bytes): - movl %eax, -6(%edx) -L(write_2bytes): - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(write_31bytes): - movl %eax, -31(%edx) -L(write_27bytes): - movl %eax, -27(%edx) -L(write_23bytes): - movl %eax, -23(%edx) -L(write_19bytes): - movl %eax, -19(%edx) -L(write_15bytes): - movl %eax, -15(%edx) -L(write_11bytes): - movl %eax, -11(%edx) -L(write_7bytes): - movl %eax, -7(%edx) -L(write_3bytes): - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -/* ECX > 32 and EDX is 4 byte aligned. */ -L(32bytesormore): - /* Fill xmm0 with the pattern. */ -#ifdef USE_AS_BZERO - pxor %xmm0, %xmm0 -#else - movd %eax, %xmm0 - pshufd $0, %xmm0, %xmm0 -#endif - testl $0xf, %edx - jz L(aligned_16) -/* ECX > 32 and EDX is not 16 byte aligned. */ -L(not_aligned_16): - movdqu %xmm0, (%edx) - movl %edx, %eax - and $-16, %edx - add $16, %edx - sub %edx, %eax - add %eax, %ecx - movd %xmm0, %eax - - ALIGN (4) -L(aligned_16): - cmp $128, %ecx - jae L(128bytesormore) - -L(aligned_16_less128bytes): - BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) - - ALIGN (4) -L(128bytesormore): - PUSH (%edi) -#ifdef DATA_CACHE_SIZE - PUSH (%ebx) - mov $DATA_CACHE_SIZE, %ebx -#else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - mov __x86_data_cache_size@GOTOFF(%ebx), %ebx -# else - PUSH (%ebx) - mov __x86_data_cache_size, %ebx -# endif -#endif - mov %ebx, %edi - shr $4, %ebx - sub %ebx, %edi -#if defined DATA_CACHE_SIZE || !defined SHARED - POP (%ebx) -#endif -/* - * When data size approximate the end of L1 cache, - * fast string will prefetch and combine data efficiently. - */ - cmp %edi, %ecx - jae L(128bytesormore_endof_L1) - subl $128, %ecx -L(128bytesormore_normal): - sub $128, %ecx - movdqa %xmm0, (%edx) - movdqa %xmm0, 0x10(%edx) - movdqa %xmm0, 0x20(%edx) - movdqa %xmm0, 0x30(%edx) - movdqa %xmm0, 0x40(%edx) - movdqa %xmm0, 0x50(%edx) - movdqa %xmm0, 0x60(%edx) - movdqa %xmm0, 0x70(%edx) - lea 128(%edx), %edx - jb L(128bytesless_normal) - - - sub $128, %ecx - movdqa %xmm0, (%edx) - movdqa %xmm0, 0x10(%edx) - movdqa %xmm0, 0x20(%edx) - movdqa %xmm0, 0x30(%edx) - movdqa %xmm0, 0x40(%edx) - movdqa %xmm0, 0x50(%edx) - movdqa %xmm0, 0x60(%edx) - movdqa %xmm0, 0x70(%edx) - lea 128(%edx), %edx - jae L(128bytesormore_normal) - -L(128bytesless_normal): - POP (%edi) - add $128, %ecx - BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) - - CFI_PUSH (%edi) - ALIGN (4) -L(128bytesormore_endof_L1): - mov %edx, %edi - mov %ecx, %edx - shr $2, %ecx - and $3, %edx - rep stosl - jz L(copy_page_by_rep_exit) - cmp $2, %edx - jb L(copy_page_by_rep_left_1) - movw %ax, (%edi) - add $2, %edi - sub $2, %edx - jz L(copy_page_by_rep_exit) -L(copy_page_by_rep_left_1): - movb %al, (%edi) -L(copy_page_by_rep_exit): - POP (%edi) - SETRTNVAL - RETURN - - .pushsection .rodata.sse2,"a",@progbits - ALIGN (2) -L(table_16_128bytes): - .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes)) - .popsection - - ALIGN (4) -L(aligned_16_112bytes): - movdqa %xmm0, -112(%edx) -L(aligned_16_96bytes): - movdqa %xmm0, -96(%edx) -L(aligned_16_80bytes): - movdqa %xmm0, -80(%edx) -L(aligned_16_64bytes): - movdqa %xmm0, -64(%edx) -L(aligned_16_48bytes): - movdqa %xmm0, -48(%edx) -L(aligned_16_32bytes): - movdqa %xmm0, -32(%edx) -L(aligned_16_16bytes): - movdqa %xmm0, -16(%edx) -L(aligned_16_0bytes): - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_113bytes): - movdqa %xmm0, -113(%edx) -L(aligned_16_97bytes): - movdqa %xmm0, -97(%edx) -L(aligned_16_81bytes): - movdqa %xmm0, -81(%edx) -L(aligned_16_65bytes): - movdqa %xmm0, -65(%edx) -L(aligned_16_49bytes): - movdqa %xmm0, -49(%edx) -L(aligned_16_33bytes): - movdqa %xmm0, -33(%edx) -L(aligned_16_17bytes): - movdqa %xmm0, -17(%edx) -L(aligned_16_1bytes): - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_114bytes): - movdqa %xmm0, -114(%edx) -L(aligned_16_98bytes): - movdqa %xmm0, -98(%edx) -L(aligned_16_82bytes): - movdqa %xmm0, -82(%edx) -L(aligned_16_66bytes): - movdqa %xmm0, -66(%edx) -L(aligned_16_50bytes): - movdqa %xmm0, -50(%edx) -L(aligned_16_34bytes): - movdqa %xmm0, -34(%edx) -L(aligned_16_18bytes): - movdqa %xmm0, -18(%edx) -L(aligned_16_2bytes): - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_115bytes): - movdqa %xmm0, -115(%edx) -L(aligned_16_99bytes): - movdqa %xmm0, -99(%edx) -L(aligned_16_83bytes): - movdqa %xmm0, -83(%edx) -L(aligned_16_67bytes): - movdqa %xmm0, -67(%edx) -L(aligned_16_51bytes): - movdqa %xmm0, -51(%edx) -L(aligned_16_35bytes): - movdqa %xmm0, -35(%edx) -L(aligned_16_19bytes): - movdqa %xmm0, -19(%edx) -L(aligned_16_3bytes): - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_116bytes): - movdqa %xmm0, -116(%edx) -L(aligned_16_100bytes): - movdqa %xmm0, -100(%edx) -L(aligned_16_84bytes): - movdqa %xmm0, -84(%edx) -L(aligned_16_68bytes): - movdqa %xmm0, -68(%edx) -L(aligned_16_52bytes): - movdqa %xmm0, -52(%edx) -L(aligned_16_36bytes): - movdqa %xmm0, -36(%edx) -L(aligned_16_20bytes): - movdqa %xmm0, -20(%edx) -L(aligned_16_4bytes): - movl %eax, -4(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_117bytes): - movdqa %xmm0, -117(%edx) -L(aligned_16_101bytes): - movdqa %xmm0, -101(%edx) -L(aligned_16_85bytes): - movdqa %xmm0, -85(%edx) -L(aligned_16_69bytes): - movdqa %xmm0, -69(%edx) -L(aligned_16_53bytes): - movdqa %xmm0, -53(%edx) -L(aligned_16_37bytes): - movdqa %xmm0, -37(%edx) -L(aligned_16_21bytes): - movdqa %xmm0, -21(%edx) -L(aligned_16_5bytes): - movl %eax, -5(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_118bytes): - movdqa %xmm0, -118(%edx) -L(aligned_16_102bytes): - movdqa %xmm0, -102(%edx) -L(aligned_16_86bytes): - movdqa %xmm0, -86(%edx) -L(aligned_16_70bytes): - movdqa %xmm0, -70(%edx) -L(aligned_16_54bytes): - movdqa %xmm0, -54(%edx) -L(aligned_16_38bytes): - movdqa %xmm0, -38(%edx) -L(aligned_16_22bytes): - movdqa %xmm0, -22(%edx) -L(aligned_16_6bytes): - movl %eax, -6(%edx) - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_119bytes): - movdqa %xmm0, -119(%edx) -L(aligned_16_103bytes): - movdqa %xmm0, -103(%edx) -L(aligned_16_87bytes): - movdqa %xmm0, -87(%edx) -L(aligned_16_71bytes): - movdqa %xmm0, -71(%edx) -L(aligned_16_55bytes): - movdqa %xmm0, -55(%edx) -L(aligned_16_39bytes): - movdqa %xmm0, -39(%edx) -L(aligned_16_23bytes): - movdqa %xmm0, -23(%edx) -L(aligned_16_7bytes): - movl %eax, -7(%edx) - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_120bytes): - movdqa %xmm0, -120(%edx) -L(aligned_16_104bytes): - movdqa %xmm0, -104(%edx) -L(aligned_16_88bytes): - movdqa %xmm0, -88(%edx) -L(aligned_16_72bytes): - movdqa %xmm0, -72(%edx) -L(aligned_16_56bytes): - movdqa %xmm0, -56(%edx) -L(aligned_16_40bytes): - movdqa %xmm0, -40(%edx) -L(aligned_16_24bytes): - movdqa %xmm0, -24(%edx) -L(aligned_16_8bytes): - movq %xmm0, -8(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_121bytes): - movdqa %xmm0, -121(%edx) -L(aligned_16_105bytes): - movdqa %xmm0, -105(%edx) -L(aligned_16_89bytes): - movdqa %xmm0, -89(%edx) -L(aligned_16_73bytes): - movdqa %xmm0, -73(%edx) -L(aligned_16_57bytes): - movdqa %xmm0, -57(%edx) -L(aligned_16_41bytes): - movdqa %xmm0, -41(%edx) -L(aligned_16_25bytes): - movdqa %xmm0, -25(%edx) -L(aligned_16_9bytes): - movq %xmm0, -9(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_122bytes): - movdqa %xmm0, -122(%edx) -L(aligned_16_106bytes): - movdqa %xmm0, -106(%edx) -L(aligned_16_90bytes): - movdqa %xmm0, -90(%edx) -L(aligned_16_74bytes): - movdqa %xmm0, -74(%edx) -L(aligned_16_58bytes): - movdqa %xmm0, -58(%edx) -L(aligned_16_42bytes): - movdqa %xmm0, -42(%edx) -L(aligned_16_26bytes): - movdqa %xmm0, -26(%edx) -L(aligned_16_10bytes): - movq %xmm0, -10(%edx) - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_123bytes): - movdqa %xmm0, -123(%edx) -L(aligned_16_107bytes): - movdqa %xmm0, -107(%edx) -L(aligned_16_91bytes): - movdqa %xmm0, -91(%edx) -L(aligned_16_75bytes): - movdqa %xmm0, -75(%edx) -L(aligned_16_59bytes): - movdqa %xmm0, -59(%edx) -L(aligned_16_43bytes): - movdqa %xmm0, -43(%edx) -L(aligned_16_27bytes): - movdqa %xmm0, -27(%edx) -L(aligned_16_11bytes): - movq %xmm0, -11(%edx) - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_124bytes): - movdqa %xmm0, -124(%edx) -L(aligned_16_108bytes): - movdqa %xmm0, -108(%edx) -L(aligned_16_92bytes): - movdqa %xmm0, -92(%edx) -L(aligned_16_76bytes): - movdqa %xmm0, -76(%edx) -L(aligned_16_60bytes): - movdqa %xmm0, -60(%edx) -L(aligned_16_44bytes): - movdqa %xmm0, -44(%edx) -L(aligned_16_28bytes): - movdqa %xmm0, -28(%edx) -L(aligned_16_12bytes): - movq %xmm0, -12(%edx) - movl %eax, -4(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_125bytes): - movdqa %xmm0, -125(%edx) -L(aligned_16_109bytes): - movdqa %xmm0, -109(%edx) -L(aligned_16_93bytes): - movdqa %xmm0, -93(%edx) -L(aligned_16_77bytes): - movdqa %xmm0, -77(%edx) -L(aligned_16_61bytes): - movdqa %xmm0, -61(%edx) -L(aligned_16_45bytes): - movdqa %xmm0, -45(%edx) -L(aligned_16_29bytes): - movdqa %xmm0, -29(%edx) -L(aligned_16_13bytes): - movq %xmm0, -13(%edx) - movl %eax, -5(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_126bytes): - movdqa %xmm0, -126(%edx) -L(aligned_16_110bytes): - movdqa %xmm0, -110(%edx) -L(aligned_16_94bytes): - movdqa %xmm0, -94(%edx) -L(aligned_16_78bytes): - movdqa %xmm0, -78(%edx) -L(aligned_16_62bytes): - movdqa %xmm0, -62(%edx) -L(aligned_16_46bytes): - movdqa %xmm0, -46(%edx) -L(aligned_16_30bytes): - movdqa %xmm0, -30(%edx) -L(aligned_16_14bytes): - movq %xmm0, -14(%edx) - movl %eax, -6(%edx) - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_127bytes): - movdqa %xmm0, -127(%edx) -L(aligned_16_111bytes): - movdqa %xmm0, -111(%edx) -L(aligned_16_95bytes): - movdqa %xmm0, -95(%edx) -L(aligned_16_79bytes): - movdqa %xmm0, -79(%edx) -L(aligned_16_63bytes): - movdqa %xmm0, -63(%edx) -L(aligned_16_47bytes): - movdqa %xmm0, -47(%edx) -L(aligned_16_31bytes): - movdqa %xmm0, -31(%edx) -L(aligned_16_15bytes): - movq %xmm0, -15(%edx) - movl %eax, -7(%edx) - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN_END - -END (__memset_sse2_rep) - -#endif diff --git a/sysdeps/i386/i686/multiarch/memset-sse2.S b/sysdeps/i386/i686/multiarch/memset-sse2.S deleted file mode 100644 index d7b8be9114..0000000000 --- a/sysdeps/i386/i686/multiarch/memset-sse2.S +++ /dev/null @@ -1,860 +0,0 @@ -/* memset with SSE2 - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -#include <sysdep.h> -#include "asm-syntax.h" - -#define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -#define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -#define PUSH(REG) pushl REG; CFI_PUSH (REG) -#define POP(REG) popl REG; CFI_POP (REG) - -#ifdef USE_AS_BZERO -# define DEST PARMS -# define LEN DEST+4 -# define SETRTNVAL -#else -# define DEST PARMS -# define CHR DEST+4 -# define LEN CHR+4 -# define SETRTNVAL movl DEST(%esp), %eax -#endif - -#ifdef SHARED -# define ENTRANCE PUSH (%ebx); -# define RETURN_END POP (%ebx); ret -# define RETURN RETURN_END; CFI_PUSH (%ebx) -# define PARMS 8 /* Preserve EBX. */ -# define JMPTBL(I, B) I - B - -/* Load an entry in a jump table into EBX and branch to it. TABLE is a - jump table with relative offsets. */ -# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ - /* We first load PC into EBX. */ \ - SETUP_PIC_REG(bx); \ - /* Get the address of the jump table. */ \ - add $(TABLE - .), %ebx; \ - /* Get the entry and convert the relative offset to the \ - absolute address. */ \ - add (%ebx,%ecx,4), %ebx; \ - add %ecx, %edx; \ - /* We loaded the jump table and adjusted EDX. Go. */ \ - jmp *%ebx -#else -# define ENTRANCE -# define RETURN_END ret -# define RETURN RETURN_END -# define PARMS 4 -# define JMPTBL(I, B) I - -/* Branch to an entry in a jump table. TABLE is a jump table with - absolute offsets. */ -# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ - add %ecx, %edx; \ - jmp *TABLE(,%ecx,4) -#endif - - .section .text.sse2,"ax",@progbits -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO -ENTRY (__memset_chk_sse2) - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb HIDDEN_JUMPTARGET (__chk_fail) -END (__memset_chk_sse2) -#endif -ENTRY (__memset_sse2) - ENTRANCE - - movl LEN(%esp), %ecx -#ifdef USE_AS_BZERO - xor %eax, %eax -#else - movzbl CHR(%esp), %eax - movb %al, %ah - /* Fill the whole EAX with pattern. */ - movl %eax, %edx - shl $16, %eax - or %edx, %eax -#endif - movl DEST(%esp), %edx - cmp $32, %ecx - jae L(32bytesormore) - -L(write_less32bytes): - BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes)) - - - .pushsection .rodata.sse2,"a",@progbits - ALIGN (2) -L(table_less_32bytes): - .int JMPTBL (L(write_0bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_1bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_2bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_3bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_4bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_5bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_6bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_7bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_8bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_9bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_10bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_11bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_12bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_13bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_14bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_15bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_16bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_17bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_18bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_19bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_20bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_21bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_22bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_23bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_24bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_25bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_26bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_27bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_28bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_29bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_30bytes), L(table_less_32bytes)) - .int JMPTBL (L(write_31bytes), L(table_less_32bytes)) - .popsection - - ALIGN (4) -L(write_28bytes): - movl %eax, -28(%edx) -L(write_24bytes): - movl %eax, -24(%edx) -L(write_20bytes): - movl %eax, -20(%edx) -L(write_16bytes): - movl %eax, -16(%edx) -L(write_12bytes): - movl %eax, -12(%edx) -L(write_8bytes): - movl %eax, -8(%edx) -L(write_4bytes): - movl %eax, -4(%edx) -L(write_0bytes): - SETRTNVAL - RETURN - - ALIGN (4) -L(write_29bytes): - movl %eax, -29(%edx) -L(write_25bytes): - movl %eax, -25(%edx) -L(write_21bytes): - movl %eax, -21(%edx) -L(write_17bytes): - movl %eax, -17(%edx) -L(write_13bytes): - movl %eax, -13(%edx) -L(write_9bytes): - movl %eax, -9(%edx) -L(write_5bytes): - movl %eax, -5(%edx) -L(write_1bytes): - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(write_30bytes): - movl %eax, -30(%edx) -L(write_26bytes): - movl %eax, -26(%edx) -L(write_22bytes): - movl %eax, -22(%edx) -L(write_18bytes): - movl %eax, -18(%edx) -L(write_14bytes): - movl %eax, -14(%edx) -L(write_10bytes): - movl %eax, -10(%edx) -L(write_6bytes): - movl %eax, -6(%edx) -L(write_2bytes): - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(write_31bytes): - movl %eax, -31(%edx) -L(write_27bytes): - movl %eax, -27(%edx) -L(write_23bytes): - movl %eax, -23(%edx) -L(write_19bytes): - movl %eax, -19(%edx) -L(write_15bytes): - movl %eax, -15(%edx) -L(write_11bytes): - movl %eax, -11(%edx) -L(write_7bytes): - movl %eax, -7(%edx) -L(write_3bytes): - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -/* ECX > 32 and EDX is 4 byte aligned. */ -L(32bytesormore): - /* Fill xmm0 with the pattern. */ -#ifdef USE_AS_BZERO - pxor %xmm0, %xmm0 -#else - movd %eax, %xmm0 - pshufd $0, %xmm0, %xmm0 -#endif - testl $0xf, %edx - jz L(aligned_16) -/* ECX > 32 and EDX is not 16 byte aligned. */ -L(not_aligned_16): - movdqu %xmm0, (%edx) - movl %edx, %eax - and $-16, %edx - add $16, %edx - sub %edx, %eax - add %eax, %ecx - movd %xmm0, %eax - - ALIGN (4) -L(aligned_16): - cmp $128, %ecx - jae L(128bytesormore) - -L(aligned_16_less128bytes): - BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) - - ALIGN (4) -L(128bytesormore): -#ifdef SHARED_CACHE_SIZE - PUSH (%ebx) - mov $SHARED_CACHE_SIZE, %ebx -#else -# ifdef SHARED - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx -# else - PUSH (%ebx) - mov __x86_shared_cache_size, %ebx -# endif -#endif - cmp %ebx, %ecx - jae L(128bytesormore_nt_start) - - -#ifdef DATA_CACHE_SIZE - POP (%ebx) -# define RESTORE_EBX_STATE CFI_PUSH (%ebx) - cmp $DATA_CACHE_SIZE, %ecx -#else -# ifdef SHARED -# define RESTORE_EBX_STATE - SETUP_PIC_REG(bx) - add $_GLOBAL_OFFSET_TABLE_, %ebx - cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx -# else - POP (%ebx) -# define RESTORE_EBX_STATE CFI_PUSH (%ebx) - cmp __x86_data_cache_size, %ecx -# endif -#endif - - jae L(128bytes_L2_normal) - subl $128, %ecx -L(128bytesormore_normal): - sub $128, %ecx - movdqa %xmm0, (%edx) - movdqa %xmm0, 0x10(%edx) - movdqa %xmm0, 0x20(%edx) - movdqa %xmm0, 0x30(%edx) - movdqa %xmm0, 0x40(%edx) - movdqa %xmm0, 0x50(%edx) - movdqa %xmm0, 0x60(%edx) - movdqa %xmm0, 0x70(%edx) - lea 128(%edx), %edx - jb L(128bytesless_normal) - - - sub $128, %ecx - movdqa %xmm0, (%edx) - movdqa %xmm0, 0x10(%edx) - movdqa %xmm0, 0x20(%edx) - movdqa %xmm0, 0x30(%edx) - movdqa %xmm0, 0x40(%edx) - movdqa %xmm0, 0x50(%edx) - movdqa %xmm0, 0x60(%edx) - movdqa %xmm0, 0x70(%edx) - lea 128(%edx), %edx - jae L(128bytesormore_normal) - -L(128bytesless_normal): - add $128, %ecx - BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) - - ALIGN (4) -L(128bytes_L2_normal): - prefetcht0 0x380(%edx) - prefetcht0 0x3c0(%edx) - sub $128, %ecx - movdqa %xmm0, (%edx) - movaps %xmm0, 0x10(%edx) - movaps %xmm0, 0x20(%edx) - movaps %xmm0, 0x30(%edx) - movaps %xmm0, 0x40(%edx) - movaps %xmm0, 0x50(%edx) - movaps %xmm0, 0x60(%edx) - movaps %xmm0, 0x70(%edx) - add $128, %edx - cmp $128, %ecx - jae L(128bytes_L2_normal) - -L(128bytesless_L2_normal): - BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) - - RESTORE_EBX_STATE -L(128bytesormore_nt_start): - sub %ebx, %ecx - ALIGN (4) -L(128bytesormore_shared_cache_loop): - prefetcht0 0x3c0(%edx) - prefetcht0 0x380(%edx) - sub $0x80, %ebx - movdqa %xmm0, (%edx) - movdqa %xmm0, 0x10(%edx) - movdqa %xmm0, 0x20(%edx) - movdqa %xmm0, 0x30(%edx) - movdqa %xmm0, 0x40(%edx) - movdqa %xmm0, 0x50(%edx) - movdqa %xmm0, 0x60(%edx) - movdqa %xmm0, 0x70(%edx) - add $0x80, %edx - cmp $0x80, %ebx - jae L(128bytesormore_shared_cache_loop) - cmp $0x80, %ecx - jb L(shared_cache_loop_end) - ALIGN (4) -L(128bytesormore_nt): - sub $0x80, %ecx - movntdq %xmm0, (%edx) - movntdq %xmm0, 0x10(%edx) - movntdq %xmm0, 0x20(%edx) - movntdq %xmm0, 0x30(%edx) - movntdq %xmm0, 0x40(%edx) - movntdq %xmm0, 0x50(%edx) - movntdq %xmm0, 0x60(%edx) - movntdq %xmm0, 0x70(%edx) - add $0x80, %edx - cmp $0x80, %ecx - jae L(128bytesormore_nt) - sfence -L(shared_cache_loop_end): -#if defined DATA_CACHE_SIZE || !defined SHARED - POP (%ebx) -#endif - BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) - - - .pushsection .rodata.sse2,"a",@progbits - ALIGN (2) -L(table_16_128bytes): - .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) - .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes)) - .popsection - - ALIGN (4) -L(aligned_16_112bytes): - movdqa %xmm0, -112(%edx) -L(aligned_16_96bytes): - movdqa %xmm0, -96(%edx) -L(aligned_16_80bytes): - movdqa %xmm0, -80(%edx) -L(aligned_16_64bytes): - movdqa %xmm0, -64(%edx) -L(aligned_16_48bytes): - movdqa %xmm0, -48(%edx) -L(aligned_16_32bytes): - movdqa %xmm0, -32(%edx) -L(aligned_16_16bytes): - movdqa %xmm0, -16(%edx) -L(aligned_16_0bytes): - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_113bytes): - movdqa %xmm0, -113(%edx) -L(aligned_16_97bytes): - movdqa %xmm0, -97(%edx) -L(aligned_16_81bytes): - movdqa %xmm0, -81(%edx) -L(aligned_16_65bytes): - movdqa %xmm0, -65(%edx) -L(aligned_16_49bytes): - movdqa %xmm0, -49(%edx) -L(aligned_16_33bytes): - movdqa %xmm0, -33(%edx) -L(aligned_16_17bytes): - movdqa %xmm0, -17(%edx) -L(aligned_16_1bytes): - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_114bytes): - movdqa %xmm0, -114(%edx) -L(aligned_16_98bytes): - movdqa %xmm0, -98(%edx) -L(aligned_16_82bytes): - movdqa %xmm0, -82(%edx) -L(aligned_16_66bytes): - movdqa %xmm0, -66(%edx) -L(aligned_16_50bytes): - movdqa %xmm0, -50(%edx) -L(aligned_16_34bytes): - movdqa %xmm0, -34(%edx) -L(aligned_16_18bytes): - movdqa %xmm0, -18(%edx) -L(aligned_16_2bytes): - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_115bytes): - movdqa %xmm0, -115(%edx) -L(aligned_16_99bytes): - movdqa %xmm0, -99(%edx) -L(aligned_16_83bytes): - movdqa %xmm0, -83(%edx) -L(aligned_16_67bytes): - movdqa %xmm0, -67(%edx) -L(aligned_16_51bytes): - movdqa %xmm0, -51(%edx) -L(aligned_16_35bytes): - movdqa %xmm0, -35(%edx) -L(aligned_16_19bytes): - movdqa %xmm0, -19(%edx) -L(aligned_16_3bytes): - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_116bytes): - movdqa %xmm0, -116(%edx) -L(aligned_16_100bytes): - movdqa %xmm0, -100(%edx) -L(aligned_16_84bytes): - movdqa %xmm0, -84(%edx) -L(aligned_16_68bytes): - movdqa %xmm0, -68(%edx) -L(aligned_16_52bytes): - movdqa %xmm0, -52(%edx) -L(aligned_16_36bytes): - movdqa %xmm0, -36(%edx) -L(aligned_16_20bytes): - movdqa %xmm0, -20(%edx) -L(aligned_16_4bytes): - movl %eax, -4(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_117bytes): - movdqa %xmm0, -117(%edx) -L(aligned_16_101bytes): - movdqa %xmm0, -101(%edx) -L(aligned_16_85bytes): - movdqa %xmm0, -85(%edx) -L(aligned_16_69bytes): - movdqa %xmm0, -69(%edx) -L(aligned_16_53bytes): - movdqa %xmm0, -53(%edx) -L(aligned_16_37bytes): - movdqa %xmm0, -37(%edx) -L(aligned_16_21bytes): - movdqa %xmm0, -21(%edx) -L(aligned_16_5bytes): - movl %eax, -5(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_118bytes): - movdqa %xmm0, -118(%edx) -L(aligned_16_102bytes): - movdqa %xmm0, -102(%edx) -L(aligned_16_86bytes): - movdqa %xmm0, -86(%edx) -L(aligned_16_70bytes): - movdqa %xmm0, -70(%edx) -L(aligned_16_54bytes): - movdqa %xmm0, -54(%edx) -L(aligned_16_38bytes): - movdqa %xmm0, -38(%edx) -L(aligned_16_22bytes): - movdqa %xmm0, -22(%edx) -L(aligned_16_6bytes): - movl %eax, -6(%edx) - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_119bytes): - movdqa %xmm0, -119(%edx) -L(aligned_16_103bytes): - movdqa %xmm0, -103(%edx) -L(aligned_16_87bytes): - movdqa %xmm0, -87(%edx) -L(aligned_16_71bytes): - movdqa %xmm0, -71(%edx) -L(aligned_16_55bytes): - movdqa %xmm0, -55(%edx) -L(aligned_16_39bytes): - movdqa %xmm0, -39(%edx) -L(aligned_16_23bytes): - movdqa %xmm0, -23(%edx) -L(aligned_16_7bytes): - movl %eax, -7(%edx) - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_120bytes): - movdqa %xmm0, -120(%edx) -L(aligned_16_104bytes): - movdqa %xmm0, -104(%edx) -L(aligned_16_88bytes): - movdqa %xmm0, -88(%edx) -L(aligned_16_72bytes): - movdqa %xmm0, -72(%edx) -L(aligned_16_56bytes): - movdqa %xmm0, -56(%edx) -L(aligned_16_40bytes): - movdqa %xmm0, -40(%edx) -L(aligned_16_24bytes): - movdqa %xmm0, -24(%edx) -L(aligned_16_8bytes): - movq %xmm0, -8(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_121bytes): - movdqa %xmm0, -121(%edx) -L(aligned_16_105bytes): - movdqa %xmm0, -105(%edx) -L(aligned_16_89bytes): - movdqa %xmm0, -89(%edx) -L(aligned_16_73bytes): - movdqa %xmm0, -73(%edx) -L(aligned_16_57bytes): - movdqa %xmm0, -57(%edx) -L(aligned_16_41bytes): - movdqa %xmm0, -41(%edx) -L(aligned_16_25bytes): - movdqa %xmm0, -25(%edx) -L(aligned_16_9bytes): - movq %xmm0, -9(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_122bytes): - movdqa %xmm0, -122(%edx) -L(aligned_16_106bytes): - movdqa %xmm0, -106(%edx) -L(aligned_16_90bytes): - movdqa %xmm0, -90(%edx) -L(aligned_16_74bytes): - movdqa %xmm0, -74(%edx) -L(aligned_16_58bytes): - movdqa %xmm0, -58(%edx) -L(aligned_16_42bytes): - movdqa %xmm0, -42(%edx) -L(aligned_16_26bytes): - movdqa %xmm0, -26(%edx) -L(aligned_16_10bytes): - movq %xmm0, -10(%edx) - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_123bytes): - movdqa %xmm0, -123(%edx) -L(aligned_16_107bytes): - movdqa %xmm0, -107(%edx) -L(aligned_16_91bytes): - movdqa %xmm0, -91(%edx) -L(aligned_16_75bytes): - movdqa %xmm0, -75(%edx) -L(aligned_16_59bytes): - movdqa %xmm0, -59(%edx) -L(aligned_16_43bytes): - movdqa %xmm0, -43(%edx) -L(aligned_16_27bytes): - movdqa %xmm0, -27(%edx) -L(aligned_16_11bytes): - movq %xmm0, -11(%edx) - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_124bytes): - movdqa %xmm0, -124(%edx) -L(aligned_16_108bytes): - movdqa %xmm0, -108(%edx) -L(aligned_16_92bytes): - movdqa %xmm0, -92(%edx) -L(aligned_16_76bytes): - movdqa %xmm0, -76(%edx) -L(aligned_16_60bytes): - movdqa %xmm0, -60(%edx) -L(aligned_16_44bytes): - movdqa %xmm0, -44(%edx) -L(aligned_16_28bytes): - movdqa %xmm0, -28(%edx) -L(aligned_16_12bytes): - movq %xmm0, -12(%edx) - movl %eax, -4(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_125bytes): - movdqa %xmm0, -125(%edx) -L(aligned_16_109bytes): - movdqa %xmm0, -109(%edx) -L(aligned_16_93bytes): - movdqa %xmm0, -93(%edx) -L(aligned_16_77bytes): - movdqa %xmm0, -77(%edx) -L(aligned_16_61bytes): - movdqa %xmm0, -61(%edx) -L(aligned_16_45bytes): - movdqa %xmm0, -45(%edx) -L(aligned_16_29bytes): - movdqa %xmm0, -29(%edx) -L(aligned_16_13bytes): - movq %xmm0, -13(%edx) - movl %eax, -5(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_126bytes): - movdqa %xmm0, -126(%edx) -L(aligned_16_110bytes): - movdqa %xmm0, -110(%edx) -L(aligned_16_94bytes): - movdqa %xmm0, -94(%edx) -L(aligned_16_78bytes): - movdqa %xmm0, -78(%edx) -L(aligned_16_62bytes): - movdqa %xmm0, -62(%edx) -L(aligned_16_46bytes): - movdqa %xmm0, -46(%edx) -L(aligned_16_30bytes): - movdqa %xmm0, -30(%edx) -L(aligned_16_14bytes): - movq %xmm0, -14(%edx) - movl %eax, -6(%edx) - movw %ax, -2(%edx) - SETRTNVAL - RETURN - - ALIGN (4) -L(aligned_16_127bytes): - movdqa %xmm0, -127(%edx) -L(aligned_16_111bytes): - movdqa %xmm0, -111(%edx) -L(aligned_16_95bytes): - movdqa %xmm0, -95(%edx) -L(aligned_16_79bytes): - movdqa %xmm0, -79(%edx) -L(aligned_16_63bytes): - movdqa %xmm0, -63(%edx) -L(aligned_16_47bytes): - movdqa %xmm0, -47(%edx) -L(aligned_16_31bytes): - movdqa %xmm0, -31(%edx) -L(aligned_16_15bytes): - movq %xmm0, -15(%edx) - movl %eax, -7(%edx) - movw %ax, -3(%edx) - movb %al, -1(%edx) - SETRTNVAL - RETURN_END - -END (__memset_sse2) - -#endif diff --git a/sysdeps/i386/i686/multiarch/memset.S b/sysdeps/i386/i686/multiarch/memset.S deleted file mode 100644 index f601663a9f..0000000000 --- a/sysdeps/i386/i686/multiarch/memset.S +++ /dev/null @@ -1,75 +0,0 @@ -/* Multiple versions of memset - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib. */ -#if IS_IN (libc) - .text -ENTRY(memset) - .type memset, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__memset_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__memset_sse2) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__memset_sse2_rep) -2: ret -END(memset) - -# undef ENTRY -# define ENTRY(name) \ - .type __memset_ia32, @function; \ - .globl __memset_ia32; \ - .p2align 4; \ - __memset_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __memset_ia32, .-__memset_ia32 - -# undef ENTRY_CHK -# define ENTRY_CHK(name) \ - .type __memset_chk_ia32, @function; \ - .globl __memset_chk_ia32; \ - .p2align 4; \ - __memset_chk_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END_CHK -# define END_CHK(name) \ - cfi_endproc; .size __memset_chk_ia32, .-__memset_chk_ia32 - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_memset; __GI_memset = __memset_ia32 -# endif - -# undef strong_alias -# define strong_alias(original, alias) -#endif - -#include "../memset.S" diff --git a/sysdeps/i386/i686/multiarch/memset_chk.S b/sysdeps/i386/i686/multiarch/memset_chk.S deleted file mode 100644 index 573cf4208a..0000000000 --- a/sysdeps/i386/i686/multiarch/memset_chk.S +++ /dev/null @@ -1,82 +0,0 @@ -/* Multiple versions of __memset_chk - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib. */ -#if IS_IN (libc) - .text -ENTRY(__memset_chk) - .type __memset_chk, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__memset_chk_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__memset_chk_sse2) - HAS_ARCH_FEATURE (Fast_Rep_String) - jz 2f - LOAD_FUNC_GOT_EAX (__memset_chk_sse2_rep) -2: ret -END(__memset_chk) - -# ifdef SHARED -strong_alias (__memset_chk, __memset_zero_constant_len_parameter) - .section .gnu.warning.__memset_zero_constant_len_parameter - .string "memset used with constant zero length parameter; this could be due to transposed parameters" -# else - .text - .type __memset_chk_sse2, @function - .p2align 4; -__memset_chk_sse2: - cfi_startproc - CALL_MCOUNT - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp __memset_sse2 - cfi_endproc - .size __memset_chk_sse2, .-__memset_chk_sse2 - - .type __memset_chk_sse2_rep, @function - .p2align 4; -__memset_chk_sse2_rep: - cfi_startproc - CALL_MCOUNT - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp __memset_sse2_rep - cfi_endproc - .size __memset_chk_sse2_rep, .-__memset_chk_sse2_rep - - .type __memset_chk_ia32, @function - .p2align 4; -__memset_chk_ia32: - cfi_startproc - CALL_MCOUNT - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp __memset_ia32 - cfi_endproc - .size __memset_chk_ia32, .-__memset_chk_ia32 -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/rawmemchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/rawmemchr-sse2-bsf.S deleted file mode 100644 index 88c0e5776c..0000000000 --- a/sysdeps/i386/i686/multiarch/rawmemchr-sse2-bsf.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_RAWMEMCHR -#define MEMCHR __rawmemchr_sse2_bsf -#include "memchr-sse2-bsf.S" diff --git a/sysdeps/i386/i686/multiarch/rawmemchr-sse2.S b/sysdeps/i386/i686/multiarch/rawmemchr-sse2.S deleted file mode 100644 index 038c74896b..0000000000 --- a/sysdeps/i386/i686/multiarch/rawmemchr-sse2.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_RAWMEMCHR -#define MEMCHR __rawmemchr_sse2 -#include "memchr-sse2.S" diff --git a/sysdeps/i386/i686/multiarch/rawmemchr.S b/sysdeps/i386/i686/multiarch/rawmemchr.S deleted file mode 100644 index 0a41d63ee8..0000000000 --- a/sysdeps/i386/i686/multiarch/rawmemchr.S +++ /dev/null @@ -1,65 +0,0 @@ -/* Multiple versions of rawmemchr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if IS_IN (libc) - .text -ENTRY(__rawmemchr) - .type __rawmemchr, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - HAS_CPU_FEATURE (SSE2) - jz 2f - HAS_ARCH_FEATURE (Slow_BSF) - jz 3f - - LOAD_FUNC_GOT_EAX (__rawmemchr_sse2) - ret - -2: LOAD_FUNC_GOT_EAX (__rawmemchr_ia32) - ret - -3: LOAD_FUNC_GOT_EAX (__rawmemchr_sse2_bsf) - ret -END(__rawmemchr) - -weak_alias(__rawmemchr, rawmemchr) - -# undef ENTRY -# define ENTRY(name) \ - .type __rawmemchr_ia32, @function; \ - .globl __rawmemchr_ia32; \ - .p2align 4; \ - __rawmemchr_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __rawmemchr_ia32, .-__rawmemchr_ia32 - -# undef libc_hidden_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_def(name) \ - .globl __GI___rawmemchr; __GI___rawmemchr = __rawmemchr_ia32 - -#endif -#include "../../rawmemchr.S" diff --git a/sysdeps/i386/i686/multiarch/rtld-strnlen.c b/sysdeps/i386/i686/multiarch/rtld-strnlen.c deleted file mode 100644 index 1aa5440644..0000000000 --- a/sysdeps/i386/i686/multiarch/rtld-strnlen.c +++ /dev/null @@ -1 +0,0 @@ -#include <string/strnlen.c> diff --git a/sysdeps/i386/i686/multiarch/s_fma-fma.c b/sysdeps/i386/i686/multiarch/s_fma-fma.c deleted file mode 100644 index 2e9619f97c..0000000000 --- a/sysdeps/i386/i686/multiarch/s_fma-fma.c +++ /dev/null @@ -1,27 +0,0 @@ -/* FMA version of fma. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <config.h> - -double -__fma_fma (double x, double y, double z) -{ - asm ("vfmadd213sd %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z)); - return x; -} diff --git a/sysdeps/i386/i686/multiarch/s_fma.c b/sysdeps/i386/i686/multiarch/s_fma.c deleted file mode 100644 index 411ebb2ba9..0000000000 --- a/sysdeps/i386/i686/multiarch/s_fma.c +++ /dev/null @@ -1,34 +0,0 @@ -/* Multiple versions of fma. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <config.h> - -#include <math.h> -#include <init-arch.h> - -extern double __fma_ia32 (double x, double y, double z) attribute_hidden; -extern double __fma_fma (double x, double y, double z) attribute_hidden; - -libm_ifunc (__fma, - HAS_ARCH_FEATURE (FMA_Usable) ? __fma_fma : __fma_ia32); -weak_alias (__fma, fma) - -#define __fma __fma_ia32 - -#include <sysdeps/ieee754/ldbl-96/s_fma.c> diff --git a/sysdeps/i386/i686/multiarch/s_fmaf-fma.c b/sysdeps/i386/i686/multiarch/s_fmaf-fma.c deleted file mode 100644 index ee57abfda2..0000000000 --- a/sysdeps/i386/i686/multiarch/s_fmaf-fma.c +++ /dev/null @@ -1,27 +0,0 @@ -/* FMA version of fmaf. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <config.h> - -float -__fmaf_fma (float x, float y, float z) -{ - asm ("vfmadd213ss %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z)); - return x; -} diff --git a/sysdeps/i386/i686/multiarch/s_fmaf.c b/sysdeps/i386/i686/multiarch/s_fmaf.c deleted file mode 100644 index 00b0fbcfc5..0000000000 --- a/sysdeps/i386/i686/multiarch/s_fmaf.c +++ /dev/null @@ -1,34 +0,0 @@ -/* Multiple versions of fmaf. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <config.h> - -#include <math.h> -#include <init-arch.h> - -extern float __fmaf_ia32 (float x, float y, float z) attribute_hidden; -extern float __fmaf_fma (float x, float y, float z) attribute_hidden; - -libm_ifunc (__fmaf, - HAS_ARCH_FEATURE (FMA_Usable) ? __fmaf_fma : __fmaf_ia32); -weak_alias (__fmaf, fmaf) - -#define __fmaf __fmaf_ia32 - -#include <sysdeps/ieee754/dbl-64/s_fmaf.c> diff --git a/sysdeps/i386/i686/multiarch/sched_cpucount.c b/sysdeps/i386/i686/multiarch/sched_cpucount.c deleted file mode 100644 index 7db31b02f8..0000000000 --- a/sysdeps/i386/i686/multiarch/sched_cpucount.c +++ /dev/null @@ -1 +0,0 @@ -#include <sysdeps/x86_64/multiarch/sched_cpucount.c> diff --git a/sysdeps/i386/i686/multiarch/stpcpy-sse2.S b/sysdeps/i386/i686/multiarch/stpcpy-sse2.S deleted file mode 100644 index 46ca1b3074..0000000000 --- a/sysdeps/i386/i686/multiarch/stpcpy-sse2.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_STPCPY -#define STRCPY __stpcpy_sse2 -#include "strcpy-sse2.S" diff --git a/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S b/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S deleted file mode 100644 index d971c2da38..0000000000 --- a/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_STPCPY -#define STRCPY __stpcpy_ssse3 -#include "strcpy-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/stpcpy.S b/sysdeps/i386/i686/multiarch/stpcpy.S deleted file mode 100644 index ee81ab6ae3..0000000000 --- a/sysdeps/i386/i686/multiarch/stpcpy.S +++ /dev/null @@ -1,9 +0,0 @@ -/* Multiple versions of stpcpy - All versions must be listed in ifunc-impl-list.c. */ -#define USE_AS_STPCPY -#define STRCPY __stpcpy -#include "strcpy.S" - -weak_alias (__stpcpy, stpcpy) -libc_hidden_def (__stpcpy) -libc_hidden_builtin_def (stpcpy) diff --git a/sysdeps/i386/i686/multiarch/stpncpy-sse2.S b/sysdeps/i386/i686/multiarch/stpncpy-sse2.S deleted file mode 100644 index 37a703cb76..0000000000 --- a/sysdeps/i386/i686/multiarch/stpncpy-sse2.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_STPCPY -#define USE_AS_STRNCPY -#define STRCPY __stpncpy_sse2 -#include "strcpy-sse2.S" diff --git a/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S b/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S deleted file mode 100644 index 14ed16f6b5..0000000000 --- a/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_STPCPY -#define USE_AS_STRNCPY -#define STRCPY __stpncpy_ssse3 -#include "strcpy-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/stpncpy.S b/sysdeps/i386/i686/multiarch/stpncpy.S deleted file mode 100644 index 2698ca6a8c..0000000000 --- a/sysdeps/i386/i686/multiarch/stpncpy.S +++ /dev/null @@ -1,8 +0,0 @@ -/* Multiple versions of stpncpy - All versions must be listed in ifunc-impl-list.c. */ -#define STRCPY __stpncpy -#define USE_AS_STPCPY -#define USE_AS_STRNCPY -#include "strcpy.S" - -weak_alias (__stpncpy, stpncpy) diff --git a/sysdeps/i386/i686/multiarch/strcasecmp-c.c b/sysdeps/i386/i686/multiarch/strcasecmp-c.c deleted file mode 100644 index 753c6ec84a..0000000000 --- a/sysdeps/i386/i686/multiarch/strcasecmp-c.c +++ /dev/null @@ -1,12 +0,0 @@ -#include <string.h> - -extern __typeof (strcasecmp) __strcasecmp_nonascii; - -#define __strcasecmp __strcasecmp_nonascii -#include <string/strcasecmp.c> - -strong_alias (__strcasecmp_nonascii, __strcasecmp_ia32) - -/* The needs of strcasecmp in libc are minimal, no need to go through - the IFUNC. */ -strong_alias (__strcasecmp_nonascii, __GI___strcasecmp) diff --git a/sysdeps/i386/i686/multiarch/strcasecmp.S b/sysdeps/i386/i686/multiarch/strcasecmp.S deleted file mode 100644 index ec59276408..0000000000 --- a/sysdeps/i386/i686/multiarch/strcasecmp.S +++ /dev/null @@ -1,39 +0,0 @@ -/* Entry point for multi-version x86 strcasecmp. - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY(__strcasecmp) - .type __strcasecmp, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__strcasecmp_ia32) - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__strcasecmp_ssse3) - HAS_CPU_FEATURE (SSE4_2) - jz 2f - HAS_ARCH_FEATURE (Slow_SSE4_2) - jnz 2f - LOAD_FUNC_GOT_EAX (__strcasecmp_sse4_2) -2: ret -END(__strcasecmp) - -weak_alias (__strcasecmp, strcasecmp) diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c b/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c deleted file mode 100644 index d4fcd2b4a1..0000000000 --- a/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c +++ /dev/null @@ -1,13 +0,0 @@ -#include <string.h> - -extern __typeof (strcasecmp_l) __strcasecmp_l_nonascii; - -#define __strcasecmp_l __strcasecmp_l_nonascii -#define USE_IN_EXTENDED_LOCALE_MODEL 1 -#include <string/strcasecmp.c> - -strong_alias (__strcasecmp_l_nonascii, __strcasecmp_l_ia32) - -/* The needs of strcasecmp in libc are minimal, no need to go through - the IFUNC. */ -strong_alias (__strcasecmp_l_nonascii, __GI___strcasecmp_l) diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S b/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S deleted file mode 100644 index 411d4153f2..0000000000 --- a/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S +++ /dev/null @@ -1,2 +0,0 @@ -#define USE_AS_STRCASECMP_L 1 -#include "strcmp-sse4.S" diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S b/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S deleted file mode 100644 index a22b93c518..0000000000 --- a/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S +++ /dev/null @@ -1,2 +0,0 @@ -#define USE_AS_STRCASECMP_L 1 -#include "strcmp-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l.S b/sysdeps/i386/i686/multiarch/strcasecmp_l.S deleted file mode 100644 index 711c09b0dc..0000000000 --- a/sysdeps/i386/i686/multiarch/strcasecmp_l.S +++ /dev/null @@ -1,7 +0,0 @@ -/* Multiple versions of strcasecmp_l - All versions must be listed in ifunc-impl-list.c. */ -#define STRCMP __strcasecmp_l -#define USE_AS_STRCASECMP_L -#include "strcmp.S" - -weak_alias (__strcasecmp_l, strcasecmp_l) diff --git a/sysdeps/i386/i686/multiarch/strcat-sse2.S b/sysdeps/i386/i686/multiarch/strcat-sse2.S deleted file mode 100644 index 6359c7330c..0000000000 --- a/sysdeps/i386/i686/multiarch/strcat-sse2.S +++ /dev/null @@ -1,1245 +0,0 @@ -/* strcat with SSE2 - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - - -#if IS_IN (libc) - -# include <sysdep.h> - - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# ifdef SHARED -# define JMPTBL(I, B) I - B - -/* Load an entry in a jump table into ECX and branch to it. TABLE is a - jump table with relative offsets. INDEX is a register contains the - index into the jump table. SCALE is the scale of INDEX. */ - -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - /* We first load PC into ECX. */ \ - SETUP_PIC_REG(cx); \ - /* Get the address of the jump table. */ \ - addl $(TABLE - .), %ecx; \ - /* Get the entry and convert the relative offset to the \ - absolute address. */ \ - addl (%ecx,INDEX,SCALE), %ecx; \ - /* We loaded the jump table and adjusted ECX. Go. */ \ - jmp *%ecx -# else -# define JMPTBL(I, B) I - -/* Branch to an entry in a jump table. TABLE is a jump table with - absolute offsets. INDEX is a register contains the index into the - jump table. SCALE is the scale of INDEX. */ - -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - jmp *TABLE(,INDEX,SCALE) -# endif - -# ifndef STRCAT -# define STRCAT __strcat_sse2 -# endif - -# define PARMS 4 -# define STR1 PARMS+4 -# define STR2 STR1+4 - -# ifdef USE_AS_STRNCAT -# define LEN STR2+8 -# define STR3 STR1+4 -# else -# define STR3 STR1 -# endif - -# define USE_AS_STRCAT -# ifdef USE_AS_STRNCAT -# define RETURN POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi); -# else -# define RETURN POP(%esi); ret; CFI_PUSH(%esi); -# endif - -.text -ENTRY (STRCAT) - PUSH (%esi) - mov STR1(%esp), %eax - mov STR2(%esp), %esi -# ifdef USE_AS_STRNCAT - PUSH (%ebx) - movl LEN(%esp), %ebx - test %ebx, %ebx - jz L(ExitZero) -# endif - cmpb $0, (%esi) - mov %esi, %ecx - mov %eax, %edx - jz L(ExitZero) - - and $63, %ecx - and $63, %edx - cmp $32, %ecx - ja L(StrlenCore7_1) - cmp $48, %edx - ja L(alignment_prolog) - - pxor %xmm0, %xmm0 - pxor %xmm4, %xmm4 - pxor %xmm7, %xmm7 - movdqu (%eax), %xmm1 - movdqu (%esi), %xmm5 - pcmpeqb %xmm1, %xmm0 - movdqu 16(%esi), %xmm6 - pmovmskb %xmm0, %ecx - pcmpeqb %xmm5, %xmm4 - pcmpeqb %xmm6, %xmm7 - test %ecx, %ecx - jnz L(exit_less16_) - mov %eax, %ecx - and $-16, %eax - jmp L(loop_prolog) - -L(alignment_prolog): - pxor %xmm0, %xmm0 - pxor %xmm4, %xmm4 - mov %edx, %ecx - pxor %xmm7, %xmm7 - and $15, %ecx - and $-16, %eax - pcmpeqb (%eax), %xmm0 - movdqu (%esi), %xmm5 - movdqu 16(%esi), %xmm6 - pmovmskb %xmm0, %edx - pcmpeqb %xmm5, %xmm4 - shr %cl, %edx - pcmpeqb %xmm6, %xmm7 - test %edx, %edx - jnz L(exit_less16) - add %eax, %ecx - - pxor %xmm0, %xmm0 -L(loop_prolog): - pxor %xmm1, %xmm1 - pxor %xmm2, %xmm2 - pxor %xmm3, %xmm3 - .p2align 4 -L(align16_loop): - pcmpeqb 16(%eax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit16) - - pcmpeqb 32(%eax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - jnz L(exit32) - - pcmpeqb 48(%eax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - jnz L(exit48) - - pcmpeqb 64(%eax), %xmm3 - pmovmskb %xmm3, %edx - lea 64(%eax), %eax - test %edx, %edx - jz L(align16_loop) - bsf %edx, %edx - add %edx, %eax - jmp L(StartStrcpyPart) - - .p2align 4 -L(exit16): - bsf %edx, %edx - lea 16(%eax, %edx), %eax - jmp L(StartStrcpyPart) - - .p2align 4 -L(exit32): - bsf %edx, %edx - lea 32(%eax, %edx), %eax - jmp L(StartStrcpyPart) - - .p2align 4 -L(exit48): - bsf %edx, %edx - lea 48(%eax, %edx), %eax - jmp L(StartStrcpyPart) - - .p2align 4 -L(exit_less16): - bsf %edx, %edx - add %ecx, %eax - add %edx, %eax - jmp L(StartStrcpyPart) - - .p2align 4 -L(exit_less16_): - bsf %ecx, %ecx - add %ecx, %eax - - .p2align 4 -L(StartStrcpyPart): - pmovmskb %xmm4, %edx -# ifdef USE_AS_STRNCAT - cmp $16, %ebx - jbe L(CopyFrom1To16BytesTail1Case2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16BytesTail1) - - movdqu %xmm5, (%eax) - pmovmskb %xmm7, %edx -# ifdef USE_AS_STRNCAT - cmp $32, %ebx - jbe L(CopyFrom1To32Bytes1Case2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To32Bytes1) - - mov %esi, %ecx - and $-16, %esi - and $15, %ecx - pxor %xmm0, %xmm0 -# ifdef USE_AS_STRNCAT - add %ecx, %ebx - sbb %edx, %edx - or %edx, %ebx -# endif - sub %ecx, %eax - jmp L(Unalign16Both) - -L(StrlenCore7_1): - mov %eax, %ecx - pxor %xmm0, %xmm0 - and $15, %ecx - and $-16, %eax - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - shr %cl, %edx - test %edx, %edx - jnz L(exit_less16_1) - add %eax, %ecx - - pxor %xmm0, %xmm0 - pxor %xmm1, %xmm1 - pxor %xmm2, %xmm2 - pxor %xmm3, %xmm3 - - .p2align 4 -L(align16_loop_1): - pcmpeqb 16(%eax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit16_1) - - pcmpeqb 32(%eax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - jnz L(exit32_1) - - pcmpeqb 48(%eax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - jnz L(exit48_1) - - pcmpeqb 64(%eax), %xmm3 - pmovmskb %xmm3, %edx - lea 64(%eax), %eax - test %edx, %edx - jz L(align16_loop_1) - bsf %edx, %edx - add %edx, %eax - jmp L(StartStrcpyPart_1) - - .p2align 4 -L(exit16_1): - bsf %edx, %edx - lea 16(%eax, %edx), %eax - jmp L(StartStrcpyPart_1) - - .p2align 4 -L(exit32_1): - bsf %edx, %edx - lea 32(%eax, %edx), %eax - jmp L(StartStrcpyPart_1) - - .p2align 4 -L(exit48_1): - bsf %edx, %edx - lea 48(%eax, %edx), %eax - jmp L(StartStrcpyPart_1) - - .p2align 4 -L(exit_less16_1): - bsf %edx, %edx - add %ecx, %eax - add %edx, %eax - - .p2align 4 -L(StartStrcpyPart_1): - mov %esi, %ecx - and $15, %ecx - and $-16, %esi - pxor %xmm0, %xmm0 - pxor %xmm1, %xmm1 - -# ifdef USE_AS_STRNCAT - cmp $48, %ebx - ja L(BigN) -# endif - pcmpeqb (%esi), %xmm1 -# ifdef USE_AS_STRNCAT - add %ecx, %ebx -# endif - pmovmskb %xmm1, %edx - shr %cl, %edx -# ifdef USE_AS_STRNCAT - cmp $16, %ebx - jbe L(CopyFrom1To16BytesTailCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16BytesTail) - - pcmpeqb 16(%esi), %xmm0 - pmovmskb %xmm0, %edx -# ifdef USE_AS_STRNCAT - cmp $32, %ebx - jbe L(CopyFrom1To32BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To32Bytes) - - movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ - movdqu %xmm1, (%eax) - sub %ecx, %eax - - .p2align 4 -L(Unalign16Both): - mov $16, %ecx - movdqa (%esi, %ecx), %xmm1 - movaps 16(%esi, %ecx), %xmm2 - movdqu %xmm1, (%eax, %ecx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx -# ifdef USE_AS_STRNCAT - sub $48, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16Bytes) -L(Unalign16BothBigN): - movaps 16(%esi, %ecx), %xmm3 - movdqu %xmm2, (%eax, %ecx) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx -# ifdef USE_AS_STRNCAT - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - movaps 16(%esi, %ecx), %xmm4 - movdqu %xmm3, (%eax, %ecx) - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx -# ifdef USE_AS_STRNCAT - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - movaps 16(%esi, %ecx), %xmm1 - movdqu %xmm4, (%eax, %ecx) - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx -# ifdef USE_AS_STRNCAT - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - movaps 16(%esi, %ecx), %xmm2 - movdqu %xmm1, (%eax, %ecx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx -# ifdef USE_AS_STRNCAT - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - movaps 16(%esi, %ecx), %xmm3 - movdqu %xmm2, (%eax, %ecx) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx -# ifdef USE_AS_STRNCAT - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - movdqu %xmm3, (%eax, %ecx) - mov %esi, %edx - lea 16(%esi, %ecx), %esi - and $-0x40, %esi - sub %esi, %edx - sub %edx, %eax -# ifdef USE_AS_STRNCAT - lea 128(%ebx, %edx), %ebx -# endif - movaps (%esi), %xmm2 - movaps %xmm2, %xmm4 - movaps 16(%esi), %xmm5 - movaps 32(%esi), %xmm3 - movaps %xmm3, %xmm6 - movaps 48(%esi), %xmm7 - pminub %xmm5, %xmm2 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - pcmpeqb %xmm0, %xmm3 - pmovmskb %xmm3, %edx -# ifdef USE_AS_STRNCAT - sub $64, %ebx - jbe L(UnalignedLeaveCase2OrCase3) -# endif - test %edx, %edx - jnz L(Unaligned64Leave) - - .p2align 4 -L(Unaligned64Loop_start): - add $64, %eax - add $64, %esi - movdqu %xmm4, -64(%eax) - movaps (%esi), %xmm2 - movdqa %xmm2, %xmm4 - movdqu %xmm5, -48(%eax) - movaps 16(%esi), %xmm5 - pminub %xmm5, %xmm2 - movaps 32(%esi), %xmm3 - movdqu %xmm6, -32(%eax) - movaps %xmm3, %xmm6 - movdqu %xmm7, -16(%eax) - movaps 48(%esi), %xmm7 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - pcmpeqb %xmm0, %xmm3 - pmovmskb %xmm3, %edx -# ifdef USE_AS_STRNCAT - sub $64, %ebx - jbe L(UnalignedLeaveCase2OrCase3) -# endif - test %edx, %edx - jz L(Unaligned64Loop_start) - -L(Unaligned64Leave): - pxor %xmm1, %xmm1 - - pcmpeqb %xmm4, %xmm0 - pcmpeqb %xmm5, %xmm1 - pmovmskb %xmm0, %edx - pmovmskb %xmm1, %ecx - test %edx, %edx - jnz L(CopyFrom1To16BytesUnaligned_0) - test %ecx, %ecx - jnz L(CopyFrom1To16BytesUnaligned_16) - - pcmpeqb %xmm6, %xmm0 - pcmpeqb %xmm7, %xmm1 - pmovmskb %xmm0, %edx - pmovmskb %xmm1, %ecx - test %edx, %edx - jnz L(CopyFrom1To16BytesUnaligned_32) - - bsf %ecx, %edx - movdqu %xmm4, (%eax) - movdqu %xmm5, 16(%eax) - movdqu %xmm6, 32(%eax) - add $48, %esi - add $48, %eax - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - -# ifdef USE_AS_STRNCAT - .p2align 4 -L(BigN): - pcmpeqb (%esi), %xmm1 - pmovmskb %xmm1, %edx - shr %cl, %edx - test %edx, %edx - jnz L(CopyFrom1To16BytesTail) - - pcmpeqb 16(%esi), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(CopyFrom1To32Bytes) - - movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ - movdqu %xmm1, (%eax) - sub %ecx, %eax - sub $48, %ebx - add %ecx, %ebx - - mov $16, %ecx - movdqa (%esi, %ecx), %xmm1 - movaps 16(%esi, %ecx), %xmm2 - movdqu %xmm1, (%eax, %ecx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - jmp L(Unalign16BothBigN) -# endif - -/*------------end of main part-------------------------------*/ - -/* Case1 */ - .p2align 4 -L(CopyFrom1To16Bytes): - add %ecx, %eax - add %ecx, %esi - bsf %edx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To16BytesTail): - add %ecx, %esi - bsf %edx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To32Bytes1): - add $16, %esi - add $16, %eax -L(CopyFrom1To16BytesTail1): - bsf %edx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To32Bytes): - bsf %edx, %edx - add %ecx, %esi - add $16, %edx - sub %ecx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To16BytesUnaligned_0): - bsf %edx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To16BytesUnaligned_16): - bsf %ecx, %edx - movdqu %xmm4, (%eax) - add $16, %esi - add $16, %eax - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To16BytesUnaligned_32): - bsf %edx, %edx - movdqu %xmm4, (%eax) - movdqu %xmm5, 16(%eax) - add $32, %esi - add $32, %eax - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - -# ifdef USE_AS_STRNCAT - - .p2align 4 -L(CopyFrom1To16BytesExit): - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - -/* Case2 */ - - .p2align 4 -L(CopyFrom1To16BytesCase2): - add $16, %ebx - add %ecx, %eax - add %ecx, %esi - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To32BytesCase2): - sub %ecx, %ebx - add %ecx, %esi - bsf %edx, %edx - add $16, %edx - sub %ecx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - -L(CopyFrom1To16BytesTailCase2): - sub %ecx, %ebx - add %ecx, %esi - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - -L(CopyFrom1To16BytesTail1Case2): - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - -/* Case2 or Case3, Case3 */ - - .p2align 4 -L(CopyFrom1To16BytesCase2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To16BytesCase2) -L(CopyFrom1To16BytesCase3): - add $16, %ebx - add %ecx, %eax - add %ecx, %esi - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To32BytesCase2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To32BytesCase2) - sub %ecx, %ebx - add %ecx, %esi - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To16BytesTailCase2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To16BytesTailCase2) - sub %ecx, %ebx - add %ecx, %esi - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To32Bytes1Case2OrCase3): - add $16, %eax - add $16, %esi - sub $16, %ebx -L(CopyFrom1To16BytesTail1Case2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To16BytesTail1Case2) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) - -# endif - -# ifdef USE_AS_STRNCAT - .p2align 4 -L(StrncatExit0): - movb %bh, (%eax) - mov STR3(%esp), %eax - RETURN -# endif - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit1): - movb %bh, 1(%eax) -# endif -L(Exit1): -# ifdef USE_AS_STRNCAT - movb (%esi), %dh -# endif - movb %dh, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit2): - movb %bh, 2(%eax) -# endif -L(Exit2): - movw (%esi), %dx - movw %dx, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit3): - movb %bh, 3(%eax) -# endif -L(Exit3): - movw (%esi), %cx - movw %cx, (%eax) -# ifdef USE_AS_STRNCAT - movb 2(%esi), %dh -# endif - movb %dh, 2(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit4): - movb %bh, 4(%eax) -# endif -L(Exit4): - movl (%esi), %edx - movl %edx, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit5): - movb %bh, 5(%eax) -# endif -L(Exit5): - movl (%esi), %ecx -# ifdef USE_AS_STRNCAT - movb 4(%esi), %dh -# endif - movb %dh, 4(%eax) - movl %ecx, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit6): - movb %bh, 6(%eax) -# endif -L(Exit6): - movl (%esi), %ecx - movw 4(%esi), %dx - movl %ecx, (%eax) - movw %dx, 4(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit7): - movb %bh, 7(%eax) -# endif -L(Exit7): - movl (%esi), %ecx - movl 3(%esi), %edx - movl %ecx, (%eax) - movl %edx, 3(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit8): - movb %bh, 8(%eax) -# endif -L(Exit8): - movlpd (%esi), %xmm0 - movlpd %xmm0, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit9): - movb %bh, 9(%eax) -# endif -L(Exit9): - movlpd (%esi), %xmm0 -# ifdef USE_AS_STRNCAT - movb 8(%esi), %dh -# endif - movb %dh, 8(%eax) - movlpd %xmm0, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit10): - movb %bh, 10(%eax) -# endif -L(Exit10): - movlpd (%esi), %xmm0 - movw 8(%esi), %dx - movlpd %xmm0, (%eax) - movw %dx, 8(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit11): - movb %bh, 11(%eax) -# endif -L(Exit11): - movlpd (%esi), %xmm0 - movl 7(%esi), %edx - movlpd %xmm0, (%eax) - movl %edx, 7(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit12): - movb %bh, 12(%eax) -# endif -L(Exit12): - movlpd (%esi), %xmm0 - movl 8(%esi), %edx - movlpd %xmm0, (%eax) - movl %edx, 8(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit13): - movb %bh, 13(%eax) -# endif -L(Exit13): - movlpd (%esi), %xmm0 - movlpd 5(%esi), %xmm1 - movlpd %xmm0, (%eax) - movlpd %xmm1, 5(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit14): - movb %bh, 14(%eax) -# endif -L(Exit14): - movlpd (%esi), %xmm0 - movlpd 6(%esi), %xmm1 - movlpd %xmm0, (%eax) - movlpd %xmm1, 6(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit15): - movb %bh, 15(%eax) -# endif -L(Exit15): - movlpd (%esi), %xmm0 - movlpd 7(%esi), %xmm1 - movlpd %xmm0, (%eax) - movlpd %xmm1, 7(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit16): - movb %bh, 16(%eax) -# endif -L(Exit16): - movdqu (%esi), %xmm0 - movdqu %xmm0, (%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit17): - movb %bh, 17(%eax) -# endif -L(Exit17): - movdqu (%esi), %xmm0 -# ifdef USE_AS_STRNCAT - movb 16(%esi), %dh -# endif - movdqu %xmm0, (%eax) - movb %dh, 16(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit18): - movb %bh, 18(%eax) -# endif -L(Exit18): - movdqu (%esi), %xmm0 - movw 16(%esi), %cx - movdqu %xmm0, (%eax) - movw %cx, 16(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit19): - movb %bh, 19(%eax) -# endif -L(Exit19): - movdqu (%esi), %xmm0 - movl 15(%esi), %ecx - movdqu %xmm0, (%eax) - movl %ecx, 15(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit20): - movb %bh, 20(%eax) -# endif -L(Exit20): - movdqu (%esi), %xmm0 - movl 16(%esi), %ecx - movdqu %xmm0, (%eax) - movl %ecx, 16(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit21): - movb %bh, 21(%eax) -# endif -L(Exit21): - movdqu (%esi), %xmm0 - movl 16(%esi), %ecx -# ifdef USE_AS_STRNCAT - movb 20(%esi), %dh -# endif - movdqu %xmm0, (%eax) - movl %ecx, 16(%eax) - movb %dh, 20(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit22): - movb %bh, 22(%eax) -# endif -L(Exit22): - movdqu (%esi), %xmm0 - movlpd 14(%esi), %xmm3 - movdqu %xmm0, (%eax) - movlpd %xmm3, 14(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit23): - movb %bh, 23(%eax) -# endif -L(Exit23): - movdqu (%esi), %xmm0 - movlpd 15(%esi), %xmm3 - movdqu %xmm0, (%eax) - movlpd %xmm3, 15(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit24): - movb %bh, 24(%eax) -# endif -L(Exit24): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movdqu %xmm0, (%eax) - movlpd %xmm2, 16(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit25): - movb %bh, 25(%eax) -# endif -L(Exit25): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 -# ifdef USE_AS_STRNCAT - movb 24(%esi), %dh -# endif - movdqu %xmm0, (%eax) - movlpd %xmm2, 16(%eax) - movb %dh, 24(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit26): - movb %bh, 26(%eax) -# endif -L(Exit26): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movw 24(%esi), %cx - movdqu %xmm0, (%eax) - movlpd %xmm2, 16(%eax) - movw %cx, 24(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit27): - movb %bh, 27(%eax) -# endif -L(Exit27): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movl 23(%esi), %ecx - movdqu %xmm0, (%eax) - movlpd %xmm2, 16(%eax) - movl %ecx, 23(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit28): - movb %bh, 28(%eax) -# endif -L(Exit28): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movl 24(%esi), %ecx - movdqu %xmm0, (%eax) - movlpd %xmm2, 16(%eax) - movl %ecx, 24(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit29): - movb %bh, 29(%eax) -# endif -L(Exit29): - movdqu (%esi), %xmm0 - movdqu 13(%esi), %xmm2 - movdqu %xmm0, (%eax) - movdqu %xmm2, 13(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit30): - movb %bh, 30(%eax) -# endif -L(Exit30): - movdqu (%esi), %xmm0 - movdqu 14(%esi), %xmm2 - movdqu %xmm0, (%eax) - movdqu %xmm2, 14(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit31): - movb %bh, 31(%eax) -# endif -L(Exit31): - movdqu (%esi), %xmm0 - movdqu 15(%esi), %xmm2 - movdqu %xmm0, (%eax) - movdqu %xmm2, 15(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -# ifdef USE_AS_STRNCAT -L(StrncatExit32): - movb %bh, 32(%eax) -# endif -L(Exit32): - movdqu (%esi), %xmm0 - movdqu 16(%esi), %xmm2 - movdqu %xmm0, (%eax) - movdqu %xmm2, 16(%eax) - mov STR3(%esp), %eax - RETURN - -# ifdef USE_AS_STRNCAT - - .p2align 4 -L(UnalignedLeaveCase2OrCase3): - test %edx, %edx - jnz L(Unaligned64LeaveCase2) -L(Unaligned64LeaveCase3): - lea 64(%ebx), %ecx - and $-16, %ecx - add $48, %ebx - jl L(CopyFrom1To16BytesCase3) - movdqu %xmm4, (%eax) - sub $16, %ebx - jb L(CopyFrom1To16BytesCase3) - movdqu %xmm5, 16(%eax) - sub $16, %ebx - jb L(CopyFrom1To16BytesCase3) - movdqu %xmm6, 32(%eax) - sub $16, %ebx - jb L(CopyFrom1To16BytesCase3) - movdqu %xmm7, 48(%eax) - xor %bh, %bh - movb %bh, 64(%eax) - mov STR3(%esp), %eax - RETURN - - .p2align 4 -L(Unaligned64LeaveCase2): - xor %ecx, %ecx - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %edx - add $48, %ebx - jle L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm5, %xmm0 - pmovmskb %xmm0, %edx - movdqu %xmm4, (%eax) - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm6, %xmm0 - pmovmskb %xmm0, %edx - movdqu %xmm5, 16(%eax) - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm7, %xmm0 - pmovmskb %xmm0, %edx - movdqu %xmm6, 32(%eax) - lea 16(%eax, %ecx), %eax - lea 16(%esi, %ecx), %esi - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) -# endif - .p2align 4 -L(ExitZero): - RETURN - -END (STRCAT) - - .p2align 4 - .section .rodata -L(ExitTable): - .int JMPTBL(L(Exit1), L(ExitTable)) - .int JMPTBL(L(Exit2), L(ExitTable)) - .int JMPTBL(L(Exit3), L(ExitTable)) - .int JMPTBL(L(Exit4), L(ExitTable)) - .int JMPTBL(L(Exit5), L(ExitTable)) - .int JMPTBL(L(Exit6), L(ExitTable)) - .int JMPTBL(L(Exit7), L(ExitTable)) - .int JMPTBL(L(Exit8), L(ExitTable)) - .int JMPTBL(L(Exit9), L(ExitTable)) - .int JMPTBL(L(Exit10), L(ExitTable)) - .int JMPTBL(L(Exit11), L(ExitTable)) - .int JMPTBL(L(Exit12), L(ExitTable)) - .int JMPTBL(L(Exit13), L(ExitTable)) - .int JMPTBL(L(Exit14), L(ExitTable)) - .int JMPTBL(L(Exit15), L(ExitTable)) - .int JMPTBL(L(Exit16), L(ExitTable)) - .int JMPTBL(L(Exit17), L(ExitTable)) - .int JMPTBL(L(Exit18), L(ExitTable)) - .int JMPTBL(L(Exit19), L(ExitTable)) - .int JMPTBL(L(Exit20), L(ExitTable)) - .int JMPTBL(L(Exit21), L(ExitTable)) - .int JMPTBL(L(Exit22), L(ExitTable)) - .int JMPTBL(L(Exit23), L(ExitTable)) - .int JMPTBL(L(Exit24), L(ExitTable)) - .int JMPTBL(L(Exit25), L(ExitTable)) - .int JMPTBL(L(Exit26), L(ExitTable)) - .int JMPTBL(L(Exit27), L(ExitTable)) - .int JMPTBL(L(Exit28), L(ExitTable)) - .int JMPTBL(L(Exit29), L(ExitTable)) - .int JMPTBL(L(Exit30), L(ExitTable)) - .int JMPTBL(L(Exit31), L(ExitTable)) - .int JMPTBL(L(Exit32), L(ExitTable)) -# ifdef USE_AS_STRNCAT -L(ExitStrncatTable): - .int JMPTBL(L(StrncatExit0), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit1), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit2), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit3), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit4), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit5), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit6), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit7), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit8), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit9), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit10), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit11), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit12), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit13), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit14), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit15), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit16), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit17), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit18), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit19), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit20), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit21), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit22), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit23), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit24), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit25), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit26), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit27), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit28), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit29), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit30), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit31), L(ExitStrncatTable)) - .int JMPTBL(L(StrncatExit32), L(ExitStrncatTable)) -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/strcat-ssse3.S b/sysdeps/i386/i686/multiarch/strcat-ssse3.S deleted file mode 100644 index 59ffbc60a5..0000000000 --- a/sysdeps/i386/i686/multiarch/strcat-ssse3.S +++ /dev/null @@ -1,572 +0,0 @@ -/* strcat with SSSE3 - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# ifndef STRCAT -# define STRCAT __strcat_ssse3 -# endif - -# define PARMS 4 -# define STR1 PARMS+4 -# define STR2 STR1+4 - -# ifdef USE_AS_STRNCAT -# define LEN STR2+8 -# endif - -# define USE_AS_STRCAT - -.text -ENTRY (STRCAT) - PUSH (%edi) - mov STR1(%esp), %edi - mov %edi, %edx - -# define RETURN jmp L(StartStrcpyPart) -# include "strlen-sse2.S" - -L(StartStrcpyPart): - mov STR2(%esp), %ecx - lea (%edi, %eax), %edx -# ifdef USE_AS_STRNCAT - PUSH (%ebx) - mov LEN(%esp), %ebx - test %ebx, %ebx - jz L(StrncatExit0) - cmp $8, %ebx - jbe L(StrncatExit8Bytes) -# endif - cmpb $0, (%ecx) - jz L(Exit1) - cmpb $0, 1(%ecx) - jz L(Exit2) - cmpb $0, 2(%ecx) - jz L(Exit3) - cmpb $0, 3(%ecx) - jz L(Exit4) - cmpb $0, 4(%ecx) - jz L(Exit5) - cmpb $0, 5(%ecx) - jz L(Exit6) - cmpb $0, 6(%ecx) - jz L(Exit7) - cmpb $0, 7(%ecx) - jz L(Exit8) - cmpb $0, 8(%ecx) - jz L(Exit9) -# ifdef USE_AS_STRNCAT - cmp $16, %ebx - jb L(StrncatExit15Bytes) -# endif - cmpb $0, 9(%ecx) - jz L(Exit10) - cmpb $0, 10(%ecx) - jz L(Exit11) - cmpb $0, 11(%ecx) - jz L(Exit12) - cmpb $0, 12(%ecx) - jz L(Exit13) - cmpb $0, 13(%ecx) - jz L(Exit14) - cmpb $0, 14(%ecx) - jz L(Exit15) - cmpb $0, 15(%ecx) - jz L(Exit16) -# ifdef USE_AS_STRNCAT - cmp $16, %ebx - je L(StrncatExit16) - -# define RETURN1 \ - POP (%ebx); \ - POP (%edi); \ - ret; \ - CFI_PUSH (%ebx); \ - CFI_PUSH (%edi) -# define USE_AS_STRNCPY -# else -# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi) -# endif -# include "strcpy-ssse3.S" - .p2align 4 -L(CopyFrom1To16Bytes): - add %esi, %edx - add %esi, %ecx - - POP (%esi) - test %al, %al - jz L(ExitHigh) - test $0x01, %al - jnz L(Exit1) - test $0x02, %al - jnz L(Exit2) - test $0x04, %al - jnz L(Exit3) - test $0x08, %al - jnz L(Exit4) - test $0x10, %al - jnz L(Exit5) - test $0x20, %al - jnz L(Exit6) - test $0x40, %al - jnz L(Exit7) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(ExitHigh): - test $0x01, %ah - jnz L(Exit9) - test $0x02, %ah - jnz L(Exit10) - test $0x04, %ah - jnz L(Exit11) - test $0x08, %ah - jnz L(Exit12) - test $0x10, %ah - jnz L(Exit13) - test $0x20, %ah - jnz L(Exit14) - test $0x40, %ah - jnz L(Exit15) - movlpd (%ecx), %xmm0 - movlpd 8(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 8(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit1): - movb %bh, 1(%edx) -L(Exit1): - movb (%ecx), %al - movb %al, (%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit2): - movb %bh, 2(%edx) -L(Exit2): - movw (%ecx), %ax - movw %ax, (%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit3): - movb %bh, 3(%edx) -L(Exit3): - movw (%ecx), %ax - movw %ax, (%edx) - movb 2(%ecx), %al - movb %al, 2(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit4): - movb %bh, 4(%edx) -L(Exit4): - movl (%ecx), %eax - movl %eax, (%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit5): - movb %bh, 5(%edx) -L(Exit5): - movl (%ecx), %eax - movl %eax, (%edx) - movb 4(%ecx), %al - movb %al, 4(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit6): - movb %bh, 6(%edx) -L(Exit6): - movl (%ecx), %eax - movl %eax, (%edx) - movw 4(%ecx), %ax - movw %ax, 4(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit7): - movb %bh, 7(%edx) -L(Exit7): - movl (%ecx), %eax - movl %eax, (%edx) - movl 3(%ecx), %eax - movl %eax, 3(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit8): - movb %bh, 8(%edx) -L(Exit8): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit9): - movb %bh, 9(%edx) -L(Exit9): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movb 8(%ecx), %al - movb %al, 8(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit10): - movb %bh, 10(%edx) -L(Exit10): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movw 8(%ecx), %ax - movw %ax, 8(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit11): - movb %bh, 11(%edx) -L(Exit11): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl 7(%ecx), %eax - movl %eax, 7(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit12): - movb %bh, 12(%edx) -L(Exit12): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl 8(%ecx), %eax - movl %eax, 8(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit13): - movb %bh, 13(%edx) -L(Exit13): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 5(%ecx), %xmm0 - movlpd %xmm0, 5(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit14): - movb %bh, 14(%edx) -L(Exit14): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 6(%ecx), %xmm0 - movlpd %xmm0, 6(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit15): - movb %bh, 15(%edx) -L(Exit15): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 7(%ecx), %xmm0 - movlpd %xmm0, 7(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit16): - movb %bh, 16(%edx) -L(Exit16): - movlpd (%ecx), %xmm0 - movlpd 8(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 8(%edx) - movl %edi, %eax - RETURN1 - -# ifdef USE_AS_STRNCPY - - CFI_PUSH(%esi) - - .p2align 4 -L(CopyFrom1To16BytesCase2): - add $16, %ebx - add %esi, %ecx - lea (%esi, %edx), %esi - lea -9(%ebx), %edx - and $1<<7, %dh - or %al, %dh - test %dh, %dh - lea (%esi), %edx - POP (%esi) - jz L(ExitHighCase2) - - test $0x01, %al - jnz L(Exit1) - cmp $1, %ebx - je L(StrncatExit1) - test $0x02, %al - jnz L(Exit2) - cmp $2, %ebx - je L(StrncatExit2) - test $0x04, %al - jnz L(Exit3) - cmp $3, %ebx - je L(StrncatExit3) - test $0x08, %al - jnz L(Exit4) - cmp $4, %ebx - je L(StrncatExit4) - test $0x10, %al - jnz L(Exit5) - cmp $5, %ebx - je L(StrncatExit5) - test $0x20, %al - jnz L(Exit6) - cmp $6, %ebx - je L(StrncatExit6) - test $0x40, %al - jnz L(Exit7) - cmp $7, %ebx - je L(StrncatExit7) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - lea 7(%edx), %eax - cmpb $1, (%eax) - sbb $-1, %eax - xor %cl, %cl - movb %cl, (%eax) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(ExitHighCase2): - test $0x01, %ah - jnz L(Exit9) - cmp $9, %ebx - je L(StrncatExit9) - test $0x02, %ah - jnz L(Exit10) - cmp $10, %ebx - je L(StrncatExit10) - test $0x04, %ah - jnz L(Exit11) - cmp $11, %ebx - je L(StrncatExit11) - test $0x8, %ah - jnz L(Exit12) - cmp $12, %ebx - je L(StrncatExit12) - test $0x10, %ah - jnz L(Exit13) - cmp $13, %ebx - je L(StrncatExit13) - test $0x20, %ah - jnz L(Exit14) - cmp $14, %ebx - je L(StrncatExit14) - test $0x40, %ah - jnz L(Exit15) - cmp $15, %ebx - je L(StrncatExit15) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 8(%ecx), %xmm1 - movlpd %xmm1, 8(%edx) - movl %edi, %eax - RETURN1 - - CFI_PUSH(%esi) - -L(CopyFrom1To16BytesCase2OrCase3): - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - - .p2align 4 -L(CopyFrom1To16BytesCase3): - add $16, %ebx - add %esi, %edx - add %esi, %ecx - - POP (%esi) - - cmp $8, %ebx - ja L(ExitHighCase3) - cmp $1, %ebx - je L(StrncatExit1) - cmp $2, %ebx - je L(StrncatExit2) - cmp $3, %ebx - je L(StrncatExit3) - cmp $4, %ebx - je L(StrncatExit4) - cmp $5, %ebx - je L(StrncatExit5) - cmp $6, %ebx - je L(StrncatExit6) - cmp $7, %ebx - je L(StrncatExit7) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movb %bh, 8(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(ExitHighCase3): - cmp $9, %ebx - je L(StrncatExit9) - cmp $10, %ebx - je L(StrncatExit10) - cmp $11, %ebx - je L(StrncatExit11) - cmp $12, %ebx - je L(StrncatExit12) - cmp $13, %ebx - je L(StrncatExit13) - cmp $14, %ebx - je L(StrncatExit14) - cmp $15, %ebx - je L(StrncatExit15) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 8(%ecx), %xmm1 - movlpd %xmm1, 8(%edx) - movb %bh, 16(%edx) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit0): - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit15Bytes): - cmp $9, %ebx - je L(StrncatExit9) - cmpb $0, 9(%ecx) - jz L(Exit10) - cmp $10, %ebx - je L(StrncatExit10) - cmpb $0, 10(%ecx) - jz L(Exit11) - cmp $11, %ebx - je L(StrncatExit11) - cmpb $0, 11(%ecx) - jz L(Exit12) - cmp $12, %ebx - je L(StrncatExit12) - cmpb $0, 12(%ecx) - jz L(Exit13) - cmp $13, %ebx - je L(StrncatExit13) - cmpb $0, 13(%ecx) - jz L(Exit14) - cmp $14, %ebx - je L(StrncatExit14) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 7(%ecx), %xmm0 - movlpd %xmm0, 7(%edx) - lea 14(%edx), %eax - cmpb $1, (%eax) - sbb $-1, %eax - movb %bh, (%eax) - movl %edi, %eax - RETURN1 - - .p2align 4 -L(StrncatExit8Bytes): - cmpb $0, (%ecx) - jz L(Exit1) - cmp $1, %ebx - je L(StrncatExit1) - cmpb $0, 1(%ecx) - jz L(Exit2) - cmp $2, %ebx - je L(StrncatExit2) - cmpb $0, 2(%ecx) - jz L(Exit3) - cmp $3, %ebx - je L(StrncatExit3) - cmpb $0, 3(%ecx) - jz L(Exit4) - cmp $4, %ebx - je L(StrncatExit4) - cmpb $0, 4(%ecx) - jz L(Exit5) - cmp $5, %ebx - je L(StrncatExit5) - cmpb $0, 5(%ecx) - jz L(Exit6) - cmp $6, %ebx - je L(StrncatExit6) - cmpb $0, 6(%ecx) - jz L(Exit7) - cmp $7, %ebx - je L(StrncatExit7) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - lea 7(%edx), %eax - cmpb $1, (%eax) - sbb $-1, %eax - movb %bh, (%eax) - movl %edi, %eax - RETURN1 - -# endif -END (STRCAT) -#endif diff --git a/sysdeps/i386/i686/multiarch/strcat.S b/sysdeps/i386/i686/multiarch/strcat.S deleted file mode 100644 index 8412cb6f23..0000000000 --- a/sysdeps/i386/i686/multiarch/strcat.S +++ /dev/null @@ -1,92 +0,0 @@ -/* Multiple versions of strcat - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#ifndef USE_AS_STRNCAT -# ifndef STRCAT -# define STRCAT strcat -# endif -#endif - -#ifdef USE_AS_STRNCAT -# define STRCAT_SSSE3 __strncat_ssse3 -# define STRCAT_SSE2 __strncat_sse2 -# define STRCAT_IA32 __strncat_ia32 -# define __GI_STRCAT __GI_strncat -#else -# define STRCAT_SSSE3 __strcat_ssse3 -# define STRCAT_SSE2 __strcat_sse2 -# define STRCAT_IA32 __strcat_ia32 -# define __GI_STRCAT __GI_strcat -#endif - - -/* Define multiple versions only for the definition in libc. Don't - define multiple versions for strncat in static library since we - need strncat before the initialization happened. */ -#if IS_IN (libc) - - .text -ENTRY(STRCAT) - .type STRCAT, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (STRCAT_IA32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (STRCAT_SSE2) - HAS_ARCH_FEATURE (Fast_Unaligned_Load) - jnz 2f - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (STRCAT_SSSE3) -2: ret -END(STRCAT) - -# undef ENTRY -# define ENTRY(name) \ - .type STRCAT_IA32, @function; \ - .align 16; \ - .globl STRCAT_IA32; \ - .hidden STRCAT_IA32; \ - STRCAT_IA32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size STRCAT_IA32, .-STRCAT_IA32 - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* It doesn't make sense to send libc-internal strcat calls through a PLT. - The speedup we get from using SSSE3 instruction is likely eaten away - by the indirect call in the PLT. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_STRCAT; __GI_STRCAT = STRCAT_IA32 -# undef libc_hidden_def -# define libc_hidden_def(name) \ - .globl __GI___STRCAT; __GI___STRCAT = STRCAT_IA32 - -# endif -#endif - -#ifndef USE_AS_STRNCAT -# include "../../strcat.S" -#endif diff --git a/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S deleted file mode 100644 index 95fd7c084e..0000000000 --- a/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S +++ /dev/null @@ -1,158 +0,0 @@ -/* strchr with SSE2 with bsf - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 8 -# define ENTRANCE PUSH(%edi) -# define RETURN POP(%edi); ret; CFI_PUSH(%edi); - -# define STR1 PARMS -# define STR2 STR1+4 - - .text -ENTRY (__strchr_sse2_bsf) - - ENTRANCE - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - - pxor %xmm2, %xmm2 - mov %ecx, %edi - punpcklbw %xmm1, %xmm1 - punpcklbw %xmm1, %xmm1 - /* ECX has OFFSET. */ - and $15, %ecx - pshufd $0, %xmm1, %xmm1 - je L(loop) - -/* Handle unaligned string. */ - and $-16, %edi - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm2, %edx - /* Check if there is a match. */ - pmovmskb %xmm0, %eax - /* Remove the leading bytes. */ - sarl %cl, %edx - sarl %cl, %eax - test %eax, %eax - je L(unaligned_no_match) - /* Check which byte is a match. */ - bsf %eax, %eax - /* Is there a NULL? */ - test %edx, %edx - je L(unaligned_match) - bsf %edx, %edx - cmpl %edx, %eax - /* Return NULL if NULL comes first. */ - ja L(return_null) -L(unaligned_match): - add %edi, %eax - add %ecx, %eax - RETURN - - .p2align 4 -L(unaligned_no_match): - test %edx, %edx - jne L(return_null) - pxor %xmm2, %xmm2 - - add $16, %edi - - .p2align 4 -/* Loop start on aligned string. */ -L(loop): - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - jmp L(loop) - -L(matches): - pmovmskb %xmm2, %edx - test %eax, %eax - jz L(return_null) - bsf %eax, %eax - /* There is a match. First find where NULL is. */ - test %edx, %edx - je L(match) - bsf %edx, %ecx - /* Check if NULL comes first. */ - cmpl %ecx, %eax - ja L(return_null) -L(match): - sub $16, %edi - add %edi, %eax - RETURN - -/* Return NULL. */ - .p2align 4 -L(return_null): - xor %eax, %eax - RETURN - -END (__strchr_sse2_bsf) -#endif diff --git a/sysdeps/i386/i686/multiarch/strchr-sse2.S b/sysdeps/i386/i686/multiarch/strchr-sse2.S deleted file mode 100644 index 1f9e875b04..0000000000 --- a/sysdeps/i386/i686/multiarch/strchr-sse2.S +++ /dev/null @@ -1,348 +0,0 @@ -/* strchr SSE2 without bsf - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 8 -# define ENTRANCE PUSH(%edi) -# define RETURN POP(%edi); ret; CFI_PUSH(%edi); - -# define STR1 PARMS -# define STR2 STR1+4 - - atom_text_section -ENTRY (__strchr_sse2) - - ENTRANCE - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - - pxor %xmm2, %xmm2 - mov %ecx, %edi - punpcklbw %xmm1, %xmm1 - punpcklbw %xmm1, %xmm1 - /* ECX has OFFSET. */ - and $15, %ecx - pshufd $0, %xmm1, %xmm1 - je L(loop) - -/* Handle unaligned string. */ - and $-16, %edi - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm2, %edx - /* Check if there is a match. */ - pmovmskb %xmm0, %eax - /* Remove the leading bytes. */ - sarl %cl, %edx - sarl %cl, %eax - test %eax, %eax - jz L(unaligned_no_match) - /* Check which byte is a match. */ - /* Is there a NULL? */ - add %ecx, %edi - test %edx, %edx - jz L(match_case1) - jmp L(match_case2) - - .p2align 4 -L(unaligned_no_match): - test %edx, %edx - jne L(return_null) - - pxor %xmm2, %xmm2 - add $16, %edi - - .p2align 4 -/* Loop start on aligned string. */ -L(loop): - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches) - test %edx, %edx - jnz L(return_null) - add $16, %edi - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches) - test %edx, %edx - jnz L(return_null) - add $16, %edi - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches) - test %edx, %edx - jnz L(return_null) - add $16, %edi - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(matches) - test %edx, %edx - jnz L(return_null) - add $16, %edi - jmp L(loop) - -L(matches): - /* There is a match. First find where NULL is. */ - test %edx, %edx - jz L(match_case1) - - .p2align 4 -L(match_case2): - test %al, %al - jz L(match_higth_case2) - - mov %al, %cl - and $15, %cl - jnz L(match_case2_4) - - mov %dl, %ch - and $15, %ch - jnz L(return_null) - - test $0x10, %al - jnz L(Exit5) - test $0x10, %dl - jnz L(return_null) - test $0x20, %al - jnz L(Exit6) - test $0x20, %dl - jnz L(return_null) - test $0x40, %al - jnz L(Exit7) - test $0x40, %dl - jnz L(return_null) - lea 7(%edi), %eax - RETURN - - .p2align 4 -L(match_case2_4): - test $0x01, %al - jnz L(Exit1) - test $0x01, %dl - jnz L(return_null) - test $0x02, %al - jnz L(Exit2) - test $0x02, %dl - jnz L(return_null) - test $0x04, %al - jnz L(Exit3) - test $0x04, %dl - jnz L(return_null) - lea 3(%edi), %eax - RETURN - - .p2align 4 -L(match_higth_case2): - test %dl, %dl - jnz L(return_null) - - mov %ah, %cl - and $15, %cl - jnz L(match_case2_12) - - mov %dh, %ch - and $15, %ch - jnz L(return_null) - - test $0x10, %ah - jnz L(Exit13) - test $0x10, %dh - jnz L(return_null) - test $0x20, %ah - jnz L(Exit14) - test $0x20, %dh - jnz L(return_null) - test $0x40, %ah - jnz L(Exit15) - test $0x40, %dh - jnz L(return_null) - lea 15(%edi), %eax - RETURN - - .p2align 4 -L(match_case2_12): - test $0x01, %ah - jnz L(Exit9) - test $0x01, %dh - jnz L(return_null) - test $0x02, %ah - jnz L(Exit10) - test $0x02, %dh - jnz L(return_null) - test $0x04, %ah - jnz L(Exit11) - test $0x04, %dh - jnz L(return_null) - lea 11(%edi), %eax - RETURN - - .p2align 4 -L(match_case1): - test %al, %al - jz L(match_higth_case1) - - test $0x01, %al - jnz L(Exit1) - test $0x02, %al - jnz L(Exit2) - test $0x04, %al - jnz L(Exit3) - test $0x08, %al - jnz L(Exit4) - test $0x10, %al - jnz L(Exit5) - test $0x20, %al - jnz L(Exit6) - test $0x40, %al - jnz L(Exit7) - lea 7(%edi), %eax - RETURN - - .p2align 4 -L(match_higth_case1): - test $0x01, %ah - jnz L(Exit9) - test $0x02, %ah - jnz L(Exit10) - test $0x04, %ah - jnz L(Exit11) - test $0x08, %ah - jnz L(Exit12) - test $0x10, %ah - jnz L(Exit13) - test $0x20, %ah - jnz L(Exit14) - test $0x40, %ah - jnz L(Exit15) - lea 15(%edi), %eax - RETURN - - .p2align 4 -L(Exit1): - lea (%edi), %eax - RETURN - - .p2align 4 -L(Exit2): - lea 1(%edi), %eax - RETURN - - .p2align 4 -L(Exit3): - lea 2(%edi), %eax - RETURN - - .p2align 4 -L(Exit4): - lea 3(%edi), %eax - RETURN - - .p2align 4 -L(Exit5): - lea 4(%edi), %eax - RETURN - - .p2align 4 -L(Exit6): - lea 5(%edi), %eax - RETURN - - .p2align 4 -L(Exit7): - lea 6(%edi), %eax - RETURN - - .p2align 4 -L(Exit9): - lea 8(%edi), %eax - RETURN - - .p2align 4 -L(Exit10): - lea 9(%edi), %eax - RETURN - - .p2align 4 -L(Exit11): - lea 10(%edi), %eax - RETURN - - .p2align 4 -L(Exit12): - lea 11(%edi), %eax - RETURN - - .p2align 4 -L(Exit13): - lea 12(%edi), %eax - RETURN - - .p2align 4 -L(Exit14): - lea 13(%edi), %eax - RETURN - - .p2align 4 -L(Exit15): - lea 14(%edi), %eax - RETURN - -/* Return NULL. */ - .p2align 4 -L(return_null): - xor %eax, %eax - RETURN - -END (__strchr_sse2) -#endif diff --git a/sysdeps/i386/i686/multiarch/strchr.S b/sysdeps/i386/i686/multiarch/strchr.S deleted file mode 100644 index 5b97b1c767..0000000000 --- a/sysdeps/i386/i686/multiarch/strchr.S +++ /dev/null @@ -1,57 +0,0 @@ -/* Multiple versions of strchr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if IS_IN (libc) - .text -ENTRY(strchr) - .type strchr, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__strchr_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__strchr_sse2_bsf) - HAS_ARCH_FEATURE (Slow_BSF) - jz 2f - LOAD_FUNC_GOT_EAX (__strchr_sse2) -2: ret -END(strchr) - -# undef ENTRY -# define ENTRY(name) \ - .type __strchr_ia32, @function; \ - .globl __strchr_ia32; \ - .p2align 4; \ - __strchr_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __strchr_ia32, .-__strchr_ia32 -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_strchr; __GI_strchr = __strchr_ia32 -#endif - -#include "../../i586/strchr.S" diff --git a/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/sysdeps/i386/i686/multiarch/strcmp-sse4.S deleted file mode 100644 index cd26058671..0000000000 --- a/sysdeps/i386/i686/multiarch/strcmp-sse4.S +++ /dev/null @@ -1,804 +0,0 @@ -/* strcmp with SSE4.2 - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -#include <sysdep.h> -#include "asm-syntax.h" - -#define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -#define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -#define PUSH(REG) pushl REG; CFI_PUSH (REG) -#define POP(REG) popl REG; CFI_POP (REG) - -#ifdef USE_AS_STRNCMP -# ifndef STRCMP -# define STRCMP __strncmp_sse4_2 -# endif -# define STR1 8 -# define STR2 STR1+4 -# define CNT STR2+4 -# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) -# define REM %ebp -#elif defined USE_AS_STRCASECMP_L -# include "locale-defines.h" -# ifndef STRCMP -# define STRCMP __strcasecmp_l_sse4_2 -# endif -# ifdef PIC -# define STR1 12 -# else -# define STR1 8 -# endif -# define STR2 STR1+4 -# define LOCALE 12 /* Loaded before the adjustment. */ -# ifdef PIC -# define RETURN POP (%edi); POP (%ebx); ret; \ - .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (%edi) -# else -# define RETURN POP (%edi); ret; .p2align 4; CFI_PUSH (%edi) -# endif -# define NONASCII __strcasecmp_nonascii -#elif defined USE_AS_STRNCASECMP_L -# include "locale-defines.h" -# ifndef STRCMP -# define STRCMP __strncasecmp_l_sse4_2 -# endif -# ifdef PIC -# define STR1 16 -# else -# define STR1 12 -# endif -# define STR2 STR1+4 -# define CNT STR2+4 -# define LOCALE 16 /* Loaded before the adjustment. */ -# ifdef PIC -# define RETURN POP (%edi); POP (REM); POP (%ebx); ret; \ - .p2align 4; \ - CFI_PUSH (%ebx); CFI_PUSH (REM); CFI_PUSH (%edi) -# else -# define RETURN POP (%edi); POP (REM); ret; \ - .p2align 4; CFI_PUSH (REM); CFI_PUSH (%edi) -# endif -# define REM %ebp -# define NONASCII __strncasecmp_nonascii -#else -# ifndef STRCMP -# define STRCMP __strcmp_sse4_2 -# endif -# define STR1 4 -# define STR2 STR1+4 -# define RETURN ret; .p2align 4 -#endif - - .section .text.sse4.2,"ax",@progbits - -#ifdef USE_AS_STRCASECMP_L -ENTRY (__strcasecmp_sse4_2) -# ifdef PIC - PUSH (%ebx) - LOAD_PIC_REG(bx) - movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax -# ifdef NO_TLS_DIRECT_SEG_REFS - addl %gs:0, %eax - movl (%eax), %eax -# else - movl %gs:(%eax), %eax -# endif -# else -# ifdef NO_TLS_DIRECT_SEG_REFS - movl %gs:0, %eax - movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax -# else - movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax -# endif -# endif -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 - movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax -# else - movl (%eax), %eax -# endif - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) -# ifdef PIC - je L(ascii) - POP (%ebx) - jmp __strcasecmp_nonascii -# else - jne __strcasecmp_nonascii - jmp L(ascii) -# endif -END (__strcasecmp_sse4_2) -#endif - -#ifdef USE_AS_STRNCASECMP_L -ENTRY (__strncasecmp_sse4_2) -# ifdef PIC - PUSH (%ebx) - LOAD_PIC_REG(bx) - movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax -# ifdef NO_TLS_DIRECT_SEG_REFS - addl %gs:0, %eax - movl (%eax), %eax -# else - movl %gs:(%eax), %eax -# endif -# else -# ifdef NO_TLS_DIRECT_SEG_REFS - movl %gs:0, %eax - movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax -# else - movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax -# endif -# endif -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 - movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax -# else - movl (%eax), %eax -# endif - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) -# ifdef PIC - je L(ascii) - POP (%ebx) - jmp __strncasecmp_nonascii -# else - jne __strncasecmp_nonascii - jmp L(ascii) -# endif -END (__strncasecmp_sse4_2) -#endif - - ENTRY (STRCMP) -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movl LOCALE(%esp), %eax -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 - movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax -# else - movl (%eax), %eax -# endif - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) - jne NONASCII - -# ifdef PIC - PUSH (%ebx) - LOAD_PIC_REG(bx) -# endif -L(ascii): - .section .rodata.cst16,"aM",@progbits,16 - .align 16 -.Lbelowupper: - .quad 0x4040404040404040 - .quad 0x4040404040404040 -.Ltopupper: - .quad 0x5b5b5b5b5b5b5b5b - .quad 0x5b5b5b5b5b5b5b5b -.Ltouppermask: - .quad 0x2020202020202020 - .quad 0x2020202020202020 - .previous - -# ifdef PIC -# define UCLOW_reg .Lbelowupper@GOTOFF(%ebx) -# define UCHIGH_reg .Ltopupper@GOTOFF(%ebx) -# define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx) -# else -# define UCLOW_reg .Lbelowupper -# define UCHIGH_reg .Ltopupper -# define LCQWORD_reg .Ltouppermask -# endif -#endif - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - PUSH (REM) -#endif -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - PUSH (%edi) -#endif - mov STR1(%esp), %edx - mov STR2(%esp), %eax -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - movl CNT(%esp), REM - test REM, REM - je L(eq) -#endif - mov %dx, %cx - and $0xfff, %cx - cmp $0xff0, %cx - ja L(first4bytes) - movdqu (%edx), %xmm2 - mov %eax, %ecx - and $0xfff, %ecx - cmp $0xff0, %ecx - ja L(first4bytes) -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# define TOLOWER(reg1, reg2) \ - movdqa reg1, %xmm3; \ - movdqa UCHIGH_reg, %xmm4; \ - movdqa reg2, %xmm5; \ - movdqa UCHIGH_reg, %xmm6; \ - pcmpgtb UCLOW_reg, %xmm3; \ - pcmpgtb reg1, %xmm4; \ - pcmpgtb UCLOW_reg, %xmm5; \ - pcmpgtb reg2, %xmm6; \ - pand %xmm4, %xmm3; \ - pand %xmm6, %xmm5; \ - pand LCQWORD_reg, %xmm3; \ - pand LCQWORD_reg, %xmm5; \ - por %xmm3, reg1; \ - por %xmm5, reg2 - - movdqu (%eax), %xmm1 - TOLOWER (%xmm2, %xmm1) - movd %xmm2, %ecx - movd %xmm1, %edi - movdqa %xmm2, %xmm3 - movdqa %xmm1, %xmm4 - cmpl %edi, %ecx -#else -# define TOLOWER(reg1, reg) - - movd %xmm2, %ecx - cmp (%eax), %ecx -#endif - jne L(less4bytes) -#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L - movdqu (%eax), %xmm1 -#endif - pxor %xmm2, %xmm1 - pxor %xmm0, %xmm0 - ptest %xmm1, %xmm0 - jnc L(less16bytes) - pcmpeqb %xmm0, %xmm2 - ptest %xmm2, %xmm0 - jnc L(less16bytes) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - sub $16, REM - jbe L(eq) -#endif - add $16, %edx - add $16, %eax -L(first4bytes): - movzbl (%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl (%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, (%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $1, REM - je L(eq) -#endif - - movzbl 1(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 1(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 1(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $2, REM - je L(eq) -#endif - movzbl 2(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 2(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 2(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $3, REM - je L(eq) -#endif - movzbl 3(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 3(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 3(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $4, REM - je L(eq) -#endif - movzbl 4(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 4(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 4(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $5, REM - je L(eq) -#endif - movzbl 5(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 5(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 5(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $6, REM - je L(eq) -#endif - movzbl 6(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 6(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 6(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $7, REM - je L(eq) -#endif - movzbl 7(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 7(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 7(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - sub $8, REM - je L(eq) -#endif - add $8, %eax - add $8, %edx - -#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L - PUSH (%edi) -#endif - PUSH (%esi) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cfi_remember_state -#endif - mov %edx, %edi - mov %eax, %esi - xorl %eax, %eax -L(check_offset): - movl %edi, %edx - movl %esi, %ecx - andl $0xfff, %edx - andl $0xfff, %ecx - cmpl %edx, %ecx - cmovl %edx, %ecx - lea -0xff0(%ecx), %edx - sub %edx, %edi - sub %edx, %esi - testl %edx, %edx - jg L(crosspage) -L(loop): - movdqu (%esi,%edx), %xmm2 - movdqu (%edi,%edx), %xmm1 - TOLOWER (%xmm2, %xmm1) - pcmpistri $0x1a, %xmm2, %xmm1 - jbe L(end) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - sub $16, REM - jbe L(more16byteseq) -#endif - - add $16, %edx - jle L(loop) -L(crosspage): - movzbl (%edi,%edx), %eax - movzbl (%esi,%edx), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx -# endif -#endif - subl %ecx, %eax - jne L(ret) - testl %ecx, %ecx - je L(ret) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - sub $1, REM - jbe L(more16byteseq) -#endif - inc %edx - cmp $15, %edx - jle L(crosspage) - add %edx, %edi - add %edx, %esi - jmp L(check_offset) - - .p2align 4 -L(end): - jnc L(ret) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - sub %ecx, REM - jbe L(more16byteseq) -#endif - lea (%ecx,%edx), %ecx - movzbl (%edi,%ecx), %eax - movzbl (%esi,%ecx), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx -# endif -#endif - subl %ecx, %eax -L(ret): - POP (%esi) - POP (%edi) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - POP (REM) -#endif -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - POP (%ebx) -# endif -#endif - ret - - .p2align 4 -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cfi_restore_state -L(more16byteseq): - POP (%esi) -# ifdef USE_AS_STRNCMP - POP (%edi) -# endif -#endif -L(eq): - xorl %eax, %eax - RETURN - -L(neq): - mov $1, %eax - ja L(neq_bigger) - neg %eax -L(neq_bigger): - RETURN - -L(less16bytes): - add $0xfefefeff, %ecx - jnc L(less4bytes) -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movd %xmm3, %edi - xor %edi, %ecx -#else - xor (%edx), %ecx -#endif - or $0xfefefeff, %ecx - add $1, %ecx - jnz L(less4bytes) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $4, REM - jbe L(eq) -#endif -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - psrldq $4, %xmm3 - psrldq $4, %xmm4 - movd %xmm3, %ecx - movd %xmm4, %edi - cmp %edi, %ecx - mov %ecx, %edi -#else - mov 4(%edx), %ecx - cmp 4(%eax), %ecx -#endif - jne L(more4bytes) - add $0xfefefeff, %ecx - jnc L(more4bytes) -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - xor %edi, %ecx -#else - xor 4(%edx), %ecx -#endif - or $0xfefefeff, %ecx - add $1, %ecx - jnz L(more4bytes) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - sub $8, REM - jbe L(eq) -#endif - - add $8, %edx - add $8, %eax -L(less4bytes): - - movzbl (%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl (%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, (%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $1, REM - je L(eq) -#endif - movzbl 1(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 1(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 1(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $2, REM - je L(eq) -#endif - - movzbl 2(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 2(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 2(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $3, REM - je L(eq) -#endif - movzbl 3(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 3(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 3(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -L(more4bytes): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $4, REM - je L(eq) -#endif - movzbl 4(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 4(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 4(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $5, REM - je L(eq) -#endif - movzbl 5(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 5(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 5(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $6, REM - je L(eq) -#endif - movzbl 6(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 6(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 6(%edx) -#endif - jne L(neq) - cmpl $0, %ecx - je L(eq) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $7, REM - je L(eq) -#endif - movzbl 7(%eax), %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movzbl 7(%edx), %edi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi -# endif - cmpl %ecx, %edi -#else - cmpb %cl, 7(%edx) -#endif - jne L(neq) - jmp L(eq) - -END (STRCMP) - -#endif diff --git a/sysdeps/i386/i686/multiarch/strcmp-ssse3.S b/sysdeps/i386/i686/multiarch/strcmp-ssse3.S deleted file mode 100644 index b25cc3e068..0000000000 --- a/sysdeps/i386/i686/multiarch/strcmp-ssse3.S +++ /dev/null @@ -1,2810 +0,0 @@ -/* strcmp with SSSE3 - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -#include <sysdep.h> -#include "asm-syntax.h" - -#define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -#define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -#define PUSH(REG) pushl REG; CFI_PUSH (REG) -#define POP(REG) popl REG; CFI_POP (REG) - -#ifdef USE_AS_STRNCMP -# ifndef STRCMP -# define STRCMP __strncmp_ssse3 -# endif -# define STR1 8 -# define STR2 STR1+4 -# define CNT STR2+4 -# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) -# define UPDATE_STRNCMP_COUNTER \ - /* calculate left number to compare */ \ - mov $16, %esi; \ - sub %ecx, %esi; \ - cmp %esi, REM; \ - jbe L(more8byteseq); \ - sub %esi, REM -# define FLAGS %ebx -# define REM %ebp -#elif defined USE_AS_STRCASECMP_L -# include "locale-defines.h" -# ifndef STRCMP -# define STRCMP __strcasecmp_l_ssse3 -# endif -# ifdef PIC -# define STR1 8 -# else -# define STR1 4 -# endif -# define STR2 STR1+4 -# define LOCALE 12 /* Loaded before the adjustment. */ -# ifdef PIC -# define RETURN POP (%ebx); ret; .p2align 4; CFI_PUSH (%ebx) -# else -# define RETURN ret; .p2align 4 -# endif -# define UPDATE_STRNCMP_COUNTER -# define FLAGS (%esp) -# define NONASCII __strcasecmp_nonascii -#elif defined USE_AS_STRNCASECMP_L -# include "locale-defines.h" -# ifndef STRCMP -# define STRCMP __strncasecmp_l_ssse3 -# endif -# ifdef PIC -# define STR1 12 -# else -# define STR1 8 -# endif -# define STR2 STR1+4 -# define CNT STR2+4 -# define LOCALE 16 /* Loaded before the adjustment. */ -# ifdef PIC -# define RETURN POP (REM); POP (%ebx); ret; \ - .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (REM) -# else -# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM) -# endif -# define UPDATE_STRNCMP_COUNTER \ - /* calculate left number to compare */ \ - mov $16, %esi; \ - sub %ecx, %esi; \ - cmp %esi, REM; \ - jbe L(more8byteseq); \ - sub %esi, REM -# define FLAGS (%esp) -# define REM %ebp -# define NONASCII __strncasecmp_nonascii -#else -# ifndef STRCMP -# define STRCMP __strcmp_ssse3 -# endif -# define STR1 4 -# define STR2 STR1+4 -# define RETURN ret; .p2align 4 -# define UPDATE_STRNCMP_COUNTER -# define FLAGS %ebx -#endif - - .section .text.ssse3,"ax",@progbits - -#ifdef USE_AS_STRCASECMP_L -ENTRY (__strcasecmp_ssse3) -# ifdef PIC - PUSH (%ebx) - LOAD_PIC_REG(bx) - movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax -# ifdef NO_TLS_DIRECT_SEG_REFS - addl %gs:0, %eax - movl (%eax), %eax -# else - movl %gs:(%eax), %eax -# endif -# else -# ifdef NO_TLS_DIRECT_SEG_REFS - movl %gs:0, %eax - movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax -# else - movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax -# endif -# endif -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 - movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax -# else - movl (%eax), %eax -# endif - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) -# ifdef PIC - je L(ascii) - POP (%ebx) - jmp __strcasecmp_nonascii -# else - jne __strcasecmp_nonascii - jmp L(ascii) -# endif -END (__strcasecmp_ssse3) -#endif - -#ifdef USE_AS_STRNCASECMP_L -ENTRY (__strncasecmp_ssse3) -# ifdef PIC - PUSH (%ebx) - LOAD_PIC_REG(bx) - movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax -# ifdef NO_TLS_DIRECT_SEG_REFS - addl %gs:0, %eax - movl (%eax), %eax -# else - movl %gs:(%eax), %eax -# endif -# else -# ifdef NO_TLS_DIRECT_SEG_REFS - movl %gs:0, %eax - movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax -# else - movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax -# endif -# endif -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 - movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax -# else - movl (%eax), %eax -# endif - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) -# ifdef PIC - je L(ascii) - POP (%ebx) - jmp __strncasecmp_nonascii -# else - jne __strncasecmp_nonascii - jmp L(ascii) -# endif -END (__strncasecmp_ssse3) -#endif - -ENTRY (STRCMP) -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movl LOCALE(%esp), %eax -# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 - movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax -# else - movl (%eax), %eax -# endif - testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax) - jne NONASCII - -# ifdef PIC - PUSH (%ebx) - LOAD_PIC_REG(bx) -# endif -L(ascii): - .section .rodata.cst16,"aM",@progbits,16 - .align 16 -.Lbelowupper: - .quad 0x4040404040404040 - .quad 0x4040404040404040 -.Ltopupper: - .quad 0x5b5b5b5b5b5b5b5b - .quad 0x5b5b5b5b5b5b5b5b -.Ltouppermask: - .quad 0x2020202020202020 - .quad 0x2020202020202020 - .previous - -# ifdef PIC -# define UCLOW_reg .Lbelowupper@GOTOFF(%ebx) -# define UCHIGH_reg .Ltopupper@GOTOFF(%ebx) -# define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx) -# else -# define UCLOW_reg .Lbelowupper -# define UCHIGH_reg .Ltopupper -# define LCQWORD_reg .Ltouppermask -# endif -#endif - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - PUSH (REM) -#endif - - movl STR1(%esp), %edx - movl STR2(%esp), %eax -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - movl CNT(%esp), REM - cmp $16, REM - jb L(less16bytes_sncmp) -#elif !defined USE_AS_STRCASECMP_L - movzbl (%eax), %ecx - cmpb %cl, (%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 1(%eax), %ecx - cmpb %cl, 1(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 2(%eax), %ecx - cmpb %cl, 2(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 3(%eax), %ecx - cmpb %cl, 3(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 4(%eax), %ecx - cmpb %cl, 4(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 5(%eax), %ecx - cmpb %cl, 5(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 6(%eax), %ecx - cmpb %cl, 6(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - movzbl 7(%eax), %ecx - cmpb %cl, 7(%edx) - jne L(neq) - cmpl $0, %ecx - je L(eq) - - add $8, %edx - add $8, %eax -#endif - movl %edx, %ecx - and $0xfff, %ecx - cmp $0xff0, %ecx - ja L(crosspage) - mov %eax, %ecx - and $0xfff, %ecx - cmp $0xff0, %ecx - ja L(crosspage) - pxor %xmm0, %xmm0 - movlpd (%eax), %xmm1 - movlpd (%edx), %xmm2 - movhpd 8(%eax), %xmm1 - movhpd 8(%edx), %xmm2 -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# define TOLOWER(reg1, reg2) \ - movdqa reg1, %xmm5; \ - movdqa reg2, %xmm7; \ - movdqa UCHIGH_reg, %xmm6; \ - pcmpgtb UCLOW_reg, %xmm5; \ - pcmpgtb UCLOW_reg, %xmm7; \ - pcmpgtb reg1, %xmm6; \ - pand %xmm6, %xmm5; \ - movdqa UCHIGH_reg, %xmm6; \ - pcmpgtb reg2, %xmm6; \ - pand %xmm6, %xmm7; \ - pand LCQWORD_reg, %xmm5; \ - por %xmm5, reg1; \ - pand LCQWORD_reg, %xmm7; \ - por %xmm7, reg2 - TOLOWER (%xmm1, %xmm2) -#else -# define TOLOWER(reg1, reg2) -#endif - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %ecx - sub $0xffff, %ecx - jnz L(less16bytes) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(eq) -#endif - add $16, %eax - add $16, %edx - -L(crosspage): - -#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L - PUSH (FLAGS) -#endif - PUSH (%edi) - PUSH (%esi) -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - pushl $0 - cfi_adjust_cfa_offset (4) -#endif -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cfi_remember_state -#endif - - movl %edx, %edi - movl %eax, %ecx - and $0xf, %ecx - and $0xf, %edi - xor %ecx, %eax - xor %edi, %edx -#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L - xor FLAGS, FLAGS -#endif - cmp %edi, %ecx - je L(ashr_0) - ja L(bigger) - orl $0x20, FLAGS - xchg %edx, %eax - xchg %ecx, %edi -L(bigger): - lea 15(%edi), %edi - sub %ecx, %edi - cmp $8, %edi - jle L(ashr_less_8) - cmp $14, %edi - je L(ashr_15) - cmp $13, %edi - je L(ashr_14) - cmp $12, %edi - je L(ashr_13) - cmp $11, %edi - je L(ashr_12) - cmp $10, %edi - je L(ashr_11) - cmp $9, %edi - je L(ashr_10) -L(ashr_less_8): - je L(ashr_9) - cmp $7, %edi - je L(ashr_8) - cmp $6, %edi - je L(ashr_7) - cmp $5, %edi - je L(ashr_6) - cmp $4, %edi - je L(ashr_5) - cmp $3, %edi - je L(ashr_4) - cmp $2, %edi - je L(ashr_3) - cmp $1, %edi - je L(ashr_2) - cmp $0, %edi - je L(ashr_1) - -/* - * The following cases will be handled by ashr_0 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(0~15) n(0~15) 15(15+ n-n) ashr_0 - */ - .p2align 4 -L(ashr_0): - mov $0xffff, %esi - movdqa (%eax), %xmm1 - pxor %xmm0, %xmm0 - pcmpeqb %xmm1, %xmm0 -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movdqa (%edx), %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm2, %xmm1 -#else - pcmpeqb (%edx), %xmm1 -#endif - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - mov %ecx, %edi - jne L(less32bytes) - UPDATE_STRNCMP_COUNTER - movl $0x10, FLAGS - mov $0x10, %ecx - pxor %xmm0, %xmm0 - .p2align 4 -L(loop_ashr_0): - movdqa (%eax, %ecx), %xmm1 -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - movdqa (%edx, %ecx), %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 -#else - pcmpeqb %xmm1, %xmm0 - pcmpeqb (%edx, %ecx), %xmm1 -#endif - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - jmp L(loop_ashr_0) - -/* - * The following cases will be handled by ashr_1 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(15) n -15 0(15 +(n-15) - n) ashr_1 - */ - .p2align 4 -L(ashr_1): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $15, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -15(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $1, FLAGS - lea 1(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_1): - add $16, %edi - jg L(nibble_ashr_1) - -L(gobble_ashr_1): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $1, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_1) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $1, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_1) - - .p2align 4 -L(nibble_ashr_1): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xfffe, %esi - jnz L(ashr_1_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $15, REM - jbe L(ashr_1_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_1) - - .p2align 4 -L(ashr_1_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $1, %xmm0 - psrldq $1, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_2 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(14~15) n -14 1(15 +(n-14) - n) ashr_2 - */ - .p2align 4 -L(ashr_2): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $14, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -14(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $2, FLAGS - lea 2(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_2): - add $16, %edi - jg L(nibble_ashr_2) - -L(gobble_ashr_2): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $2, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_2) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $2, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_2) - - .p2align 4 -L(nibble_ashr_2): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xfffc, %esi - jnz L(ashr_2_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $14, REM - jbe L(ashr_2_exittail) -#endif - - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_2) - - .p2align 4 -L(ashr_2_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $2, %xmm0 - psrldq $2, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_3 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(13~15) n -13 2(15 +(n-13) - n) ashr_3 - */ - .p2align 4 -L(ashr_3): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $13, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -13(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $3, FLAGS - lea 3(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_3): - add $16, %edi - jg L(nibble_ashr_3) - -L(gobble_ashr_3): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $3, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_3) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $3, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_3) - - .p2align 4 -L(nibble_ashr_3): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xfff8, %esi - jnz L(ashr_3_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $13, REM - jbe L(ashr_3_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_3) - - .p2align 4 -L(ashr_3_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $3, %xmm0 - psrldq $3, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_4 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(12~15) n -12 3(15 +(n-12) - n) ashr_4 - */ - .p2align 4 -L(ashr_4): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $12, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -12(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $4, FLAGS - lea 4(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_4): - add $16, %edi - jg L(nibble_ashr_4) - -L(gobble_ashr_4): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $4, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_4) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $4, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_4) - - .p2align 4 -L(nibble_ashr_4): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xfff0, %esi - jnz L(ashr_4_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $12, REM - jbe L(ashr_4_exittail) -#endif - - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_4) - - .p2align 4 -L(ashr_4_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $4, %xmm0 - psrldq $4, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_5 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(11~15) n -11 4(15 +(n-11) - n) ashr_5 - */ - .p2align 4 -L(ashr_5): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $11, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -11(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $5, FLAGS - lea 5(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_5): - add $16, %edi - jg L(nibble_ashr_5) - -L(gobble_ashr_5): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $5, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_5) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $5, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_5) - - .p2align 4 -L(nibble_ashr_5): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xffe0, %esi - jnz L(ashr_5_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $11, REM - jbe L(ashr_5_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_5) - - .p2align 4 -L(ashr_5_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $5, %xmm0 - psrldq $5, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_6 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(10~15) n -10 5(15 +(n-10) - n) ashr_6 - */ - - .p2align 4 -L(ashr_6): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $10, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -10(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $6, FLAGS - lea 6(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_6): - add $16, %edi - jg L(nibble_ashr_6) - -L(gobble_ashr_6): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $6, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_6) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $6, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_6) - - .p2align 4 -L(nibble_ashr_6): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xffc0, %esi - jnz L(ashr_6_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $10, REM - jbe L(ashr_6_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_6) - - .p2align 4 -L(ashr_6_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $6, %xmm0 - psrldq $6, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_7 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7 - */ - - .p2align 4 -L(ashr_7): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $9, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -9(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $7, FLAGS - lea 8(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_7): - add $16, %edi - jg L(nibble_ashr_7) - -L(gobble_ashr_7): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $7, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_7) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $7, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_7) - - .p2align 4 -L(nibble_ashr_7): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xff80, %esi - jnz L(ashr_7_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $9, REM - jbe L(ashr_7_exittail) -#endif - pxor %xmm0, %xmm0 - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_7) - - .p2align 4 -L(ashr_7_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $7, %xmm0 - psrldq $7, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_8 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8 - */ - .p2align 4 -L(ashr_8): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $8, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -8(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $8, FLAGS - lea 8(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_8): - add $16, %edi - jg L(nibble_ashr_8) - -L(gobble_ashr_8): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $8, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_8) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $8, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_8) - - .p2align 4 -L(nibble_ashr_8): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xff00, %esi - jnz L(ashr_8_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $8, REM - jbe L(ashr_8_exittail) -#endif - pxor %xmm0, %xmm0 - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_8) - - .p2align 4 -L(ashr_8_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $8, %xmm0 - psrldq $8, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_9 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9 - */ - .p2align 4 -L(ashr_9): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $7, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -7(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $9, FLAGS - lea 9(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_9): - add $16, %edi - jg L(nibble_ashr_9) - -L(gobble_ashr_9): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $9, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_9) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $9, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_9) - - .p2align 4 -L(nibble_ashr_9): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xfe00, %esi - jnz L(ashr_9_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $7, REM - jbe L(ashr_9_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_9) - - .p2align 4 -L(ashr_9_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $9, %xmm0 - psrldq $9, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_10 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10 - */ - .p2align 4 -L(ashr_10): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $6, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -6(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $10, FLAGS - lea 10(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_10): - add $16, %edi - jg L(nibble_ashr_10) - -L(gobble_ashr_10): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $10, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_10) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $10, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_10) - - .p2align 4 -L(nibble_ashr_10): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xfc00, %esi - jnz L(ashr_10_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $6, REM - jbe L(ashr_10_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_10) - - .p2align 4 -L(ashr_10_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $10, %xmm0 - psrldq $10, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_11 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11 - */ - .p2align 4 -L(ashr_11): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $5, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -5(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $11, FLAGS - lea 11(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_11): - add $16, %edi - jg L(nibble_ashr_11) - -L(gobble_ashr_11): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $11, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_11) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $11, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_11) - - .p2align 4 -L(nibble_ashr_11): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xf800, %esi - jnz L(ashr_11_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $5, REM - jbe L(ashr_11_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_11) - - .p2align 4 -L(ashr_11_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $11, %xmm0 - psrldq $11, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_12 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12 - */ - .p2align 4 -L(ashr_12): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $4, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -4(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $12, FLAGS - lea 12(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_12): - add $16, %edi - jg L(nibble_ashr_12) - -L(gobble_ashr_12): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $12, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_12) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $12, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_12) - - .p2align 4 -L(nibble_ashr_12): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xf000, %esi - jnz L(ashr_12_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $4, REM - jbe L(ashr_12_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_12) - - .p2align 4 -L(ashr_12_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $12, %xmm0 - psrldq $12, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_13 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13 - */ - .p2align 4 -L(ashr_13): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $3, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -3(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $13, FLAGS - lea 13(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_13): - add $16, %edi - jg L(nibble_ashr_13) - -L(gobble_ashr_13): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $13, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_13) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $13, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_13) - - .p2align 4 -L(nibble_ashr_13): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xe000, %esi - jnz L(ashr_13_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $3, REM - jbe L(ashr_13_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_13) - - .p2align 4 -L(ashr_13_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $13, %xmm0 - psrldq $13, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_14 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14 - */ - .p2align 4 -L(ashr_14): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $2, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -2(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $14, FLAGS - lea 14(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_14): - add $16, %edi - jg L(nibble_ashr_14) - -L(gobble_ashr_14): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $14, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_14) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $14, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_14) - - .p2align 4 -L(nibble_ashr_14): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0xc000, %esi - jnz L(ashr_14_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $2, REM - jbe L(ashr_14_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_14) - - .p2align 4 -L(ashr_14_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $14, %xmm0 - psrldq $14, %xmm3 - jmp L(aftertail) - -/* - * The following cases will be handled by ashr_14 - * ecx(offset of esi) eax(offset of edi) relative offset corresponding case - * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15 - */ - - .p2align 4 -L(ashr_15): - mov $0xffff, %esi - pxor %xmm0, %xmm0 - movdqa (%edx), %xmm2 - movdqa (%eax), %xmm1 - pcmpeqb %xmm1, %xmm0 - pslldq $1, %xmm2 - TOLOWER (%xmm1, %xmm2) - pcmpeqb %xmm1, %xmm2 - psubb %xmm0, %xmm2 - pmovmskb %xmm2, %edi - shr %cl, %esi - shr %cl, %edi - sub %edi, %esi - lea -1(%ecx), %edi - jnz L(less32bytes) - - UPDATE_STRNCMP_COUNTER - - movdqa (%edx), %xmm3 - pxor %xmm0, %xmm0 - mov $16, %ecx - orl $15, FLAGS - lea 15(%edx), %edi - and $0xfff, %edi - sub $0x1000, %edi - - .p2align 4 -L(loop_ashr_15): - add $16, %edi - jg L(nibble_ashr_15) - -L(gobble_ashr_15): - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $15, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - - add $16, %edi - jg L(nibble_ashr_15) - - movdqa (%eax, %ecx), %xmm1 - movdqa (%edx, %ecx), %xmm2 - movdqa %xmm2, %xmm4 - - palignr $15, %xmm3, %xmm2 - TOLOWER (%xmm1, %xmm2) - - pcmpeqb %xmm1, %xmm0 - pcmpeqb %xmm2, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - sub $0xffff, %esi - jnz L(exit) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $16, REM - lea -16(REM), REM - jbe L(more8byteseq) -#endif - add $16, %ecx - movdqa %xmm4, %xmm3 - jmp L(loop_ashr_15) - - .p2align 4 -L(nibble_ashr_15): - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %esi - test $0x8000, %esi - jnz L(ashr_15_exittail) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $1, REM - jbe L(ashr_15_exittail) -#endif - pxor %xmm0, %xmm0 - sub $0x1000, %edi - jmp L(gobble_ashr_15) - - .p2align 4 -L(ashr_15_exittail): - movdqa (%eax, %ecx), %xmm1 - psrldq $15, %xmm0 - psrldq $15, %xmm3 - jmp L(aftertail) - - .p2align 4 -L(aftertail): - TOLOWER (%xmm1, %xmm3) - pcmpeqb %xmm3, %xmm1 - psubb %xmm0, %xmm1 - pmovmskb %xmm1, %esi - not %esi -L(exit): - mov FLAGS, %edi - and $0x1f, %edi - lea -16(%edi, %ecx), %edi -L(less32bytes): - add %edi, %edx - add %ecx, %eax - testl $0x20, FLAGS - jz L(ret2) - xchg %eax, %edx - - .p2align 4 -L(ret2): - mov %esi, %ecx -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - addl $4, %esp - cfi_adjust_cfa_offset (-4) -#endif - POP (%esi) - POP (%edi) -#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L - POP (FLAGS) -#endif -L(less16bytes): - test %cl, %cl - jz L(2next_8_bytes) - - test $0x01, %cl - jnz L(Byte0) - - test $0x02, %cl - jnz L(Byte1) - - test $0x04, %cl - jnz L(Byte2) - - test $0x08, %cl - jnz L(Byte3) - - test $0x10, %cl - jnz L(Byte4) - - test $0x20, %cl - jnz L(Byte5) - - test $0x40, %cl - jnz L(Byte6) -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $7, REM - jbe L(eq) -#endif - - movzx 7(%eax), %ecx - movzx 7(%edx), %eax -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte0): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $0, REM - jbe L(eq) -#endif - movzx (%eax), %ecx - movzx (%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte1): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $1, REM - jbe L(eq) -#endif - movzx 1(%eax), %ecx - movzx 1(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte2): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $2, REM - jbe L(eq) -#endif - movzx 2(%eax), %ecx - movzx 2(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte3): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $3, REM - jbe L(eq) -#endif - movzx 3(%eax), %ecx - movzx 3(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte4): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $4, REM - jbe L(eq) -#endif - movzx 4(%eax), %ecx - movzx 4(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte5): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $5, REM - jbe L(eq) -#endif - movzx 5(%eax), %ecx - movzx 5(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(Byte6): -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $6, REM - jbe L(eq) -#endif - movzx 6(%eax), %ecx - movzx 6(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -L(2next_8_bytes): - add $8, %eax - add $8, %edx -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $8, REM - lea -8(REM), REM - jbe L(eq) -#endif - - test $0x01, %ch - jnz L(Byte0) - - test $0x02, %ch - jnz L(Byte1) - - test $0x04, %ch - jnz L(Byte2) - - test $0x08, %ch - jnz L(Byte3) - - test $0x10, %ch - jnz L(Byte4) - - test $0x20, %ch - jnz L(Byte5) - - test $0x40, %ch - jnz L(Byte6) - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - cmp $7, REM - jbe L(eq) -#endif - movzx 7(%eax), %ecx - movzx 7(%edx), %eax - -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax -# endif -#endif - - sub %ecx, %eax - RETURN - -#ifdef USE_AS_STRNCMP -L(neq_sncmp): -#endif -L(neq): - mov $1, %eax - ja L(neq_bigger) - neg %eax -L(neq_bigger): -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L - addl $4, %esp - cfi_adjust_cfa_offset (-4) -#endif -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - POP (REM) -#endif -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - POP (%ebx) -# endif -#endif - ret - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - .p2align 4 - cfi_restore_state -L(more8byteseq): - -# ifdef USE_AS_STRNCASECMP_L - addl $4, %esp - cfi_adjust_cfa_offset (-4) -# endif - POP (%esi) - POP (%edi) -# ifdef USE_AS_STRNCMP - POP (FLAGS) -# endif -#endif - -#ifdef USE_AS_STRNCMP -L(eq_sncmp): -#endif -L(eq): - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - POP (REM) -#endif -#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L -# ifdef PIC - POP (%ebx) -# endif -#endif - xorl %eax, %eax - ret - -#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - .p2align 4 -# if defined USE_AS_STRNCASECMP_L && defined PIC - CFI_PUSH (%ebx) -# endif - CFI_PUSH (REM) -L(less16bytes_sncmp): -# ifdef USE_AS_STRNCASECMP_L - PUSH (%esi) -# endif - test REM, REM - jz L(eq_sncmp) - - movzbl (%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl (%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, (%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $1, REM - je L(eq_sncmp) - - movzbl 1(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 1(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 1(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $2, REM - je L(eq_sncmp) - - movzbl 2(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 2(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 2(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $3, REM - je L(eq_sncmp) - - movzbl 3(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 3(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 3(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $4, REM - je L(eq_sncmp) - - movzbl 4(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 4(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 4(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $5, REM - je L(eq_sncmp) - - movzbl 5(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 5(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 5(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $6, REM - je L(eq_sncmp) - - movzbl 6(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 6(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 6(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $7, REM - je L(eq_sncmp) - - movzbl 7(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 7(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 7(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - - cmp $8, REM - je L(eq_sncmp) - - movzbl 8(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 8(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 8(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $9, REM - je L(eq_sncmp) - - movzbl 9(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 9(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 9(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $10, REM - je L(eq_sncmp) - - movzbl 10(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 10(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 10(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $11, REM - je L(eq_sncmp) - - movzbl 11(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 11(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 11(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - - cmp $12, REM - je L(eq_sncmp) - - movzbl 12(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 12(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 12(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $13, REM - je L(eq_sncmp) - - movzbl 13(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 13(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 13(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $14, REM - je L(eq_sncmp) - - movzbl 14(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 14(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 14(%edx) -# endif - jne L(neq_sncmp) - test %cl, %cl - je L(eq_sncmp) - - cmp $15, REM - je L(eq_sncmp) - - movzbl 15(%eax), %ecx -# ifdef USE_AS_STRNCASECMP_L - movzbl 15(%edx), %esi -# ifdef PIC - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi -# else - movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx - movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi -# endif - cmpl %ecx, %esi -# else - cmpb %cl, 15(%edx) -# endif - jne L(neq_sncmp) - -# ifdef USE_AS_STRNCASECMP_L -L(eq_sncmp): - POP (%esi) -# endif - POP (REM) -# if defined USE_AS_STRNCASECMP_L && defined PIC - POP (%ebx) -# endif - xor %eax, %eax - ret - -# ifdef USE_AS_STRNCASECMP_L - .p2align 4 -# ifdef PIC - CFI_PUSH (%ebx) -# endif - CFI_PUSH (REM) - CFI_PUSH (%esi) -L(neq_sncmp): - mov $1, %eax - mov $-1, %edx - cmovna %edx, %eax - POP (%esi) - POP (REM) -# ifdef PIC - POP (%ebx) -# endif - ret -# endif -#endif - -END (STRCMP) - -#endif diff --git a/sysdeps/i386/i686/multiarch/strcmp.S b/sysdeps/i386/i686/multiarch/strcmp.S deleted file mode 100644 index 56de25a4b7..0000000000 --- a/sysdeps/i386/i686/multiarch/strcmp.S +++ /dev/null @@ -1,95 +0,0 @@ -/* Multiple versions of strcmp - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#ifdef USE_AS_STRNCMP -# define STRCMP strncmp -# define __GI_STRCMP __GI_strncmp -# define __STRCMP_IA32 __strncmp_ia32 -# define __STRCMP_SSSE3 __strncmp_ssse3 -# define __STRCMP_SSE4_2 __strncmp_sse4_2 -#elif defined USE_AS_STRCASECMP_L -# define STRCMP __strcasecmp_l -# define __GI_STRCMP __GI_strcasecmp_l -# define __STRCMP_IA32 __strcasecmp_l_ia32 -# define __STRCMP_SSSE3 __strcasecmp_l_ssse3 -# define __STRCMP_SSE4_2 __strcasecmp_l_sse4_2 -#elif defined USE_AS_STRNCASECMP_L -# define STRCMP __strncasecmp_l -# define __GI_STRCMP __GI_strncasecmp_l -# define __STRCMP_IA32 __strncasecmp_l_ia32 -# define __STRCMP_SSSE3 __strncasecmp_l_ssse3 -# define __STRCMP_SSE4_2 __strncasecmp_l_sse4_2 -#else -# define STRCMP strcmp -# define __GI_STRCMP __GI_strcmp -# define __STRCMP_IA32 __strcmp_ia32 -# define __STRCMP_SSSE3 __strcmp_ssse3 -# define __STRCMP_SSE4_2 __strcmp_sse4_2 -#endif - -/* Define multiple versions only for the definition in libc. Don't - define multiple versions for strncmp in static library since we - need strncmp before the initialization happened. */ -#if (defined SHARED || !defined USE_AS_STRNCMP) && IS_IN (libc) - .text -ENTRY(STRCMP) - .type STRCMP, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__STRCMP_IA32) - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__STRCMP_SSSE3) - HAS_CPU_FEATURE (SSE4_2) - jz 2f - HAS_ARCH_FEATURE (Slow_SSE4_2) - jnz 2f - LOAD_FUNC_GOT_EAX (__STRCMP_SSE4_2) -2: ret -END(STRCMP) - -# undef ENTRY -# define ENTRY(name) \ - .type __STRCMP_IA32, @function; \ - .p2align 4; \ - .globl __STRCMP_IA32; \ - .hidden __STRCMP_IA32; \ - __STRCMP_IA32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __STRCMP_IA32, .-__STRCMP_IA32 - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_STRCMP; __GI_STRCMP = __STRCMP_IA32 -# endif -#endif - -#if !defined USE_AS_STRNCMP && !defined USE_AS_STRCASECMP_L \ - && !defined USE_AS_STRNCASECMP_L -# include "../strcmp.S" -#endif diff --git a/sysdeps/i386/i686/multiarch/strcpy-sse2.S b/sysdeps/i386/i686/multiarch/strcpy-sse2.S deleted file mode 100644 index ed627a5f62..0000000000 --- a/sysdeps/i386/i686/multiarch/strcpy-sse2.S +++ /dev/null @@ -1,2250 +0,0 @@ -/* strcpy with SSE2 and unaligned load - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - - -#if IS_IN (libc) - -# include <sysdep.h> - - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# ifndef STRCPY -# define STRCPY __strcpy_sse2 -# endif - -# define STR1 PARMS -# define STR2 STR1+4 -# define LEN STR2+4 - -# ifdef USE_AS_STRNCPY -# define PARMS 16 -# define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi) -# define RETURN POP(%edi); POP(%esi); POP(%ebx); ret; \ - CFI_PUSH(%ebx); CFI_PUSH(%esi); CFI_PUSH(%edi); - -# ifdef SHARED -# define JMPTBL(I, B) I - B - -/* Load an entry in a jump table into ECX and branch to it. TABLE is a - jump table with relative offsets. - INDEX is a register contains the index into the jump table. - SCALE is the scale of INDEX. */ - -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - /* We first load PC into ECX. */ \ - SETUP_PIC_REG(cx); \ - /* Get the address of the jump table. */ \ - addl $(TABLE - .), %ecx; \ - /* Get the entry and convert the relative offset to the \ - absolute address. */ \ - addl (%ecx,INDEX,SCALE), %ecx; \ - /* We loaded the jump table and adjusted ECX. Go. */ \ - jmp *%ecx -# else -# define JMPTBL(I, B) I - -/* Branch to an entry in a jump table. TABLE is a jump table with - absolute offsets. INDEX is a register contains the index into the - jump table. SCALE is the scale of INDEX. */ - -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ - jmp *TABLE(,INDEX,SCALE) -# endif - -.text -ENTRY (STRCPY) - ENTRANCE - mov STR1(%esp), %edi - mov STR2(%esp), %esi - movl LEN(%esp), %ebx - test %ebx, %ebx - jz L(ExitZero) - - mov %esi, %ecx -# ifndef USE_AS_STPCPY - mov %edi, %eax /* save result */ -# endif - and $15, %ecx - jz L(SourceStringAlignmentZero) - - and $-16, %esi - pxor %xmm0, %xmm0 - pxor %xmm1, %xmm1 - - pcmpeqb (%esi), %xmm1 - add %ecx, %ebx - pmovmskb %xmm1, %edx - shr %cl, %edx -# ifdef USE_AS_STPCPY - cmp $16, %ebx - jbe L(CopyFrom1To16BytesTailCase2OrCase3) -# else - cmp $17, %ebx - jbe L(CopyFrom1To16BytesTailCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16BytesTail) - - pcmpeqb 16(%esi), %xmm0 - pmovmskb %xmm0, %edx -# ifdef USE_AS_STPCPY - cmp $32, %ebx - jbe L(CopyFrom1To32BytesCase2OrCase3) -# else - cmp $33, %ebx - jbe L(CopyFrom1To32BytesCase2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To32Bytes) - - movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ - movdqu %xmm1, (%edi) - - sub %ecx, %edi - -/* If source address alignment != destination address alignment */ - .p2align 4 -L(Unalign16Both): - mov $16, %ecx - movdqa (%esi, %ecx), %xmm1 - movaps 16(%esi, %ecx), %xmm2 - movdqu %xmm1, (%edi, %ecx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - sub $48, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm2) - - movaps 16(%esi, %ecx), %xmm3 - movdqu %xmm2, (%edi, %ecx) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm3) - - movaps 16(%esi, %ecx), %xmm4 - movdqu %xmm3, (%edi, %ecx) - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm4) - - movaps 16(%esi, %ecx), %xmm1 - movdqu %xmm4, (%edi, %ecx) - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm1) - - movaps 16(%esi, %ecx), %xmm2 - movdqu %xmm1, (%edi, %ecx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm2) - - movaps 16(%esi, %ecx), %xmm3 - movdqu %xmm2, (%edi, %ecx) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %edx - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm3) - - movdqu %xmm3, (%edi, %ecx) - mov %esi, %edx - lea 16(%esi, %ecx), %esi - and $-0x40, %esi - sub %esi, %edx - sub %edx, %edi - lea 128(%ebx, %edx), %ebx - -L(Unaligned64Loop): - movaps (%esi), %xmm2 - movaps %xmm2, %xmm4 - movaps 16(%esi), %xmm5 - movaps 32(%esi), %xmm3 - movaps %xmm3, %xmm6 - movaps 48(%esi), %xmm7 - pminub %xmm5, %xmm2 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - pcmpeqb %xmm0, %xmm3 - pmovmskb %xmm3, %edx - sub $64, %ebx - jbe L(UnalignedLeaveCase2OrCase3) - test %edx, %edx - jnz L(Unaligned64Leave) -L(Unaligned64Loop_start): - add $64, %edi - add $64, %esi - movdqu %xmm4, -64(%edi) - movaps (%esi), %xmm2 - movdqa %xmm2, %xmm4 - movdqu %xmm5, -48(%edi) - movaps 16(%esi), %xmm5 - pminub %xmm5, %xmm2 - movaps 32(%esi), %xmm3 - movdqu %xmm6, -32(%edi) - movaps %xmm3, %xmm6 - movdqu %xmm7, -16(%edi) - movaps 48(%esi), %xmm7 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - pcmpeqb %xmm0, %xmm3 - pmovmskb %xmm3, %edx - sub $64, %ebx - jbe L(UnalignedLeaveCase2OrCase3) - test %edx, %edx - jz L(Unaligned64Loop_start) -L(Unaligned64Leave): - pxor %xmm1, %xmm1 - - pcmpeqb %xmm4, %xmm0 - pcmpeqb %xmm5, %xmm1 - pmovmskb %xmm0, %edx - pmovmskb %xmm1, %ecx - test %edx, %edx - jnz L(CopyFrom1To16BytesUnaligned_0) - test %ecx, %ecx - jnz L(CopyFrom1To16BytesUnaligned_16) - - pcmpeqb %xmm6, %xmm0 - pcmpeqb %xmm7, %xmm1 - pmovmskb %xmm0, %edx - pmovmskb %xmm1, %ecx - test %edx, %edx - jnz L(CopyFrom1To16BytesUnaligned_32) - - bsf %ecx, %edx - movdqu %xmm4, (%edi) - movdqu %xmm5, 16(%edi) - movdqu %xmm6, 32(%edi) -# ifdef USE_AS_STPCPY - lea 48(%edi, %edx), %eax -# endif - movdqu %xmm7, 48(%edi) - add $15, %ebx - sub %edx, %ebx - lea 49(%edi, %edx), %edi - jmp L(StrncpyFillTailWithZero) - -/* If source address alignment == destination address alignment */ - -L(SourceStringAlignmentZero): - pxor %xmm0, %xmm0 - movdqa (%esi), %xmm1 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %edx -# ifdef USE_AS_STPCPY - cmp $16, %ebx - jbe L(CopyFrom1To16BytesTail1Case2OrCase3) -# else - cmp $17, %ebx - jbe L(CopyFrom1To16BytesTail1Case2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To16BytesTail1) - - pcmpeqb 16(%esi), %xmm0 - movdqu %xmm1, (%edi) - pmovmskb %xmm0, %edx -# ifdef USE_AS_STPCPY - cmp $32, %ebx - jbe L(CopyFrom1To32Bytes1Case2OrCase3) -# else - cmp $33, %ebx - jbe L(CopyFrom1To32Bytes1Case2OrCase3) -# endif - test %edx, %edx - jnz L(CopyFrom1To32Bytes1) - - jmp L(Unalign16Both) - -/*-----------------End of main part---------------------------*/ - -/* Case1 */ - .p2align 4 -L(CopyFrom1To16BytesTail): - sub %ecx, %ebx - add %ecx, %esi - bsf %edx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To32Bytes1): - add $16, %esi - add $16, %edi - sub $16, %ebx -L(CopyFrom1To16BytesTail1): - bsf %edx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To32Bytes): - sub %ecx, %ebx - bsf %edx, %edx - add %ecx, %esi - add $16, %edx - sub %ecx, %edx - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - - .p2align 4 -L(CopyFrom1To16BytesUnaligned_0): - bsf %edx, %edx -# ifdef USE_AS_STPCPY - lea (%edi, %edx), %eax -# endif - movdqu %xmm4, (%edi) - add $63, %ebx - sub %edx, %ebx - lea 1(%edi, %edx), %edi - jmp L(StrncpyFillTailWithZero) - - .p2align 4 -L(CopyFrom1To16BytesUnaligned_16): - bsf %ecx, %edx - movdqu %xmm4, (%edi) -# ifdef USE_AS_STPCPY - lea 16(%edi, %edx), %eax -# endif - movdqu %xmm5, 16(%edi) - add $47, %ebx - sub %edx, %ebx - lea 17(%edi, %edx), %edi - jmp L(StrncpyFillTailWithZero) - - .p2align 4 -L(CopyFrom1To16BytesUnaligned_32): - bsf %edx, %edx - movdqu %xmm4, (%edi) - movdqu %xmm5, 16(%edi) -# ifdef USE_AS_STPCPY - lea 32(%edi, %edx), %eax -# endif - movdqu %xmm6, 32(%edi) - add $31, %ebx - sub %edx, %ebx - lea 33(%edi, %edx), %edi - jmp L(StrncpyFillTailWithZero) - - .p2align 4 -L(CopyFrom1To16BytesUnalignedXmm6): - movdqu %xmm6, (%edi, %ecx) - jmp L(CopyFrom1To16BytesXmmExit) - - .p2align 4 -L(CopyFrom1To16BytesUnalignedXmm5): - movdqu %xmm5, (%edi, %ecx) - jmp L(CopyFrom1To16BytesXmmExit) - - .p2align 4 -L(CopyFrom1To16BytesUnalignedXmm4): - movdqu %xmm4, (%edi, %ecx) - jmp L(CopyFrom1To16BytesXmmExit) - - .p2align 4 -L(CopyFrom1To16BytesUnalignedXmm3): - movdqu %xmm3, (%edi, %ecx) - jmp L(CopyFrom1To16BytesXmmExit) - - .p2align 4 -L(CopyFrom1To16BytesUnalignedXmm1): - movdqu %xmm1, (%edi, %ecx) - jmp L(CopyFrom1To16BytesXmmExit) - - .p2align 4 -L(CopyFrom1To16BytesExit): - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - -/* Case2 */ - - .p2align 4 -L(CopyFrom1To16BytesCase2): - add $16, %ebx - add %ecx, %edi - add %ecx, %esi - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To32BytesCase2): - sub %ecx, %ebx - add %ecx, %esi - bsf %edx, %edx - add $16, %edx - sub %ecx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - -L(CopyFrom1To16BytesTailCase2): - sub %ecx, %ebx - add %ecx, %esi - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - -L(CopyFrom1To16BytesTail1Case2): - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - -/* Case2 or Case3, Case3 */ - - .p2align 4 -L(CopyFrom1To16BytesCase2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To16BytesCase2) -L(CopyFrom1To16BytesCase3): - add $16, %ebx - add %ecx, %edi - add %ecx, %esi - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To32BytesCase2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To32BytesCase2) - sub %ecx, %ebx - add %ecx, %esi - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To16BytesTailCase2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To16BytesTailCase2) - sub %ecx, %ebx - add %ecx, %esi - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - - .p2align 4 -L(CopyFrom1To32Bytes1Case2OrCase3): - add $16, %edi - add $16, %esi - sub $16, %ebx -L(CopyFrom1To16BytesTail1Case2OrCase3): - test %edx, %edx - jnz L(CopyFrom1To16BytesTail1Case2) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - - .p2align 4 -L(Exit0): -# ifdef USE_AS_STPCPY - mov %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit1): - movb %dh, (%edi) -# ifdef USE_AS_STPCPY - lea (%edi), %eax -# endif - sub $1, %ebx - lea 1(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit2): - movw (%esi), %dx - movw %dx, (%edi) -# ifdef USE_AS_STPCPY - lea 1(%edi), %eax -# endif - sub $2, %ebx - lea 2(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit3): - movw (%esi), %cx - movw %cx, (%edi) - movb %dh, 2(%edi) -# ifdef USE_AS_STPCPY - lea 2(%edi), %eax -# endif - sub $3, %ebx - lea 3(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit4): - movl (%esi), %edx - movl %edx, (%edi) -# ifdef USE_AS_STPCPY - lea 3(%edi), %eax -# endif - sub $4, %ebx - lea 4(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit5): - movl (%esi), %ecx - movb %dh, 4(%edi) - movl %ecx, (%edi) -# ifdef USE_AS_STPCPY - lea 4(%edi), %eax -# endif - sub $5, %ebx - lea 5(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit6): - movl (%esi), %ecx - movw 4(%esi), %dx - movl %ecx, (%edi) - movw %dx, 4(%edi) -# ifdef USE_AS_STPCPY - lea 5(%edi), %eax -# endif - sub $6, %ebx - lea 6(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit7): - movl (%esi), %ecx - movl 3(%esi), %edx - movl %ecx, (%edi) - movl %edx, 3(%edi) -# ifdef USE_AS_STPCPY - lea 6(%edi), %eax -# endif - sub $7, %ebx - lea 7(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit8): - movlpd (%esi), %xmm0 - movlpd %xmm0, (%edi) -# ifdef USE_AS_STPCPY - lea 7(%edi), %eax -# endif - sub $8, %ebx - lea 8(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit9): - movlpd (%esi), %xmm0 - movb %dh, 8(%edi) - movlpd %xmm0, (%edi) -# ifdef USE_AS_STPCPY - lea 8(%edi), %eax -# endif - sub $9, %ebx - lea 9(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit10): - movlpd (%esi), %xmm0 - movw 8(%esi), %dx - movlpd %xmm0, (%edi) - movw %dx, 8(%edi) -# ifdef USE_AS_STPCPY - lea 9(%edi), %eax -# endif - sub $10, %ebx - lea 10(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit11): - movlpd (%esi), %xmm0 - movl 7(%esi), %edx - movlpd %xmm0, (%edi) - movl %edx, 7(%edi) -# ifdef USE_AS_STPCPY - lea 10(%edi), %eax -# endif - sub $11, %ebx - lea 11(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit12): - movlpd (%esi), %xmm0 - movl 8(%esi), %edx - movlpd %xmm0, (%edi) - movl %edx, 8(%edi) -# ifdef USE_AS_STPCPY - lea 11(%edi), %eax -# endif - sub $12, %ebx - lea 12(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit13): - movlpd (%esi), %xmm0 - movlpd 5(%esi), %xmm1 - movlpd %xmm0, (%edi) - movlpd %xmm1, 5(%edi) -# ifdef USE_AS_STPCPY - lea 12(%edi), %eax -# endif - sub $13, %ebx - lea 13(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit14): - movlpd (%esi), %xmm0 - movlpd 6(%esi), %xmm1 - movlpd %xmm0, (%edi) - movlpd %xmm1, 6(%edi) -# ifdef USE_AS_STPCPY - lea 13(%edi), %eax -# endif - sub $14, %ebx - lea 14(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit15): - movlpd (%esi), %xmm0 - movlpd 7(%esi), %xmm1 - movlpd %xmm0, (%edi) - movlpd %xmm1, 7(%edi) -# ifdef USE_AS_STPCPY - lea 14(%edi), %eax -# endif - sub $15, %ebx - lea 15(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit16): - movdqu (%esi), %xmm0 - movdqu %xmm0, (%edi) -# ifdef USE_AS_STPCPY - lea 15(%edi), %eax -# endif - sub $16, %ebx - lea 16(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit17): - movdqu (%esi), %xmm0 - movdqu %xmm0, (%edi) - movb %dh, 16(%edi) -# ifdef USE_AS_STPCPY - lea 16(%edi), %eax -# endif - sub $17, %ebx - lea 17(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit18): - movdqu (%esi), %xmm0 - movw 16(%esi), %cx - movdqu %xmm0, (%edi) - movw %cx, 16(%edi) -# ifdef USE_AS_STPCPY - lea 17(%edi), %eax -# endif - sub $18, %ebx - lea 18(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit19): - movdqu (%esi), %xmm0 - movl 15(%esi), %ecx - movdqu %xmm0, (%edi) - movl %ecx, 15(%edi) -# ifdef USE_AS_STPCPY - lea 18(%edi), %eax -# endif - sub $19, %ebx - lea 19(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit20): - movdqu (%esi), %xmm0 - movl 16(%esi), %ecx - movdqu %xmm0, (%edi) - movl %ecx, 16(%edi) -# ifdef USE_AS_STPCPY - lea 19(%edi), %eax -# endif - sub $20, %ebx - lea 20(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit21): - movdqu (%esi), %xmm0 - movl 16(%esi), %ecx - movdqu %xmm0, (%edi) - movl %ecx, 16(%edi) - movb %dh, 20(%edi) -# ifdef USE_AS_STPCPY - lea 20(%edi), %eax -# endif - sub $21, %ebx - lea 21(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit22): - movdqu (%esi), %xmm0 - movlpd 14(%esi), %xmm3 - movdqu %xmm0, (%edi) - movlpd %xmm3, 14(%edi) -# ifdef USE_AS_STPCPY - lea 21(%edi), %eax -# endif - sub $22, %ebx - lea 22(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit23): - movdqu (%esi), %xmm0 - movlpd 15(%esi), %xmm3 - movdqu %xmm0, (%edi) - movlpd %xmm3, 15(%edi) -# ifdef USE_AS_STPCPY - lea 22(%edi), %eax -# endif - sub $23, %ebx - lea 23(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit24): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) -# ifdef USE_AS_STPCPY - lea 23(%edi), %eax -# endif - sub $24, %ebx - lea 24(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit25): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movb %dh, 24(%edi) -# ifdef USE_AS_STPCPY - lea 24(%edi), %eax -# endif - sub $25, %ebx - lea 25(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit26): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movw 24(%esi), %cx - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movw %cx, 24(%edi) -# ifdef USE_AS_STPCPY - lea 25(%edi), %eax -# endif - sub $26, %ebx - lea 26(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit27): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movl 23(%esi), %ecx - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movl %ecx, 23(%edi) -# ifdef USE_AS_STPCPY - lea 26(%edi), %eax -# endif - sub $27, %ebx - lea 27(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit28): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movl 24(%esi), %ecx - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movl %ecx, 24(%edi) -# ifdef USE_AS_STPCPY - lea 27(%edi), %eax -# endif - sub $28, %ebx - lea 28(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit29): - movdqu (%esi), %xmm0 - movdqu 13(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 13(%edi) -# ifdef USE_AS_STPCPY - lea 28(%edi), %eax -# endif - sub $29, %ebx - lea 29(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit30): - movdqu (%esi), %xmm0 - movdqu 14(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 14(%edi) -# ifdef USE_AS_STPCPY - lea 29(%edi), %eax -# endif - sub $30, %ebx - lea 30(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - - .p2align 4 -L(Exit31): - movdqu (%esi), %xmm0 - movdqu 15(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 15(%edi) -# ifdef USE_AS_STPCPY - lea 30(%edi), %eax -# endif - sub $31, %ebx - lea 31(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(Exit32): - movdqu (%esi), %xmm0 - movdqu 16(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 16(%edi) -# ifdef USE_AS_STPCPY - lea 31(%edi), %eax -# endif - sub $32, %ebx - lea 32(%edi), %edi - jnz L(StrncpyFillTailWithZero) - RETURN - - .p2align 4 -L(StrncpyExit1): - movb (%esi), %dl - movb %dl, (%edi) -# ifdef USE_AS_STPCPY - lea 1(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit2): - movw (%esi), %dx - movw %dx, (%edi) -# ifdef USE_AS_STPCPY - lea 2(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit3): - movw (%esi), %cx - movb 2(%esi), %dl - movw %cx, (%edi) - movb %dl, 2(%edi) -# ifdef USE_AS_STPCPY - lea 3(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit4): - movl (%esi), %edx - movl %edx, (%edi) -# ifdef USE_AS_STPCPY - lea 4(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit5): - movl (%esi), %ecx - movb 4(%esi), %dl - movl %ecx, (%edi) - movb %dl, 4(%edi) -# ifdef USE_AS_STPCPY - lea 5(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit6): - movl (%esi), %ecx - movw 4(%esi), %dx - movl %ecx, (%edi) - movw %dx, 4(%edi) -# ifdef USE_AS_STPCPY - lea 6(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit7): - movl (%esi), %ecx - movl 3(%esi), %edx - movl %ecx, (%edi) - movl %edx, 3(%edi) -# ifdef USE_AS_STPCPY - lea 7(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit8): - movlpd (%esi), %xmm0 - movlpd %xmm0, (%edi) -# ifdef USE_AS_STPCPY - lea 8(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit9): - movlpd (%esi), %xmm0 - movb 8(%esi), %dl - movlpd %xmm0, (%edi) - movb %dl, 8(%edi) -# ifdef USE_AS_STPCPY - lea 9(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit10): - movlpd (%esi), %xmm0 - movw 8(%esi), %dx - movlpd %xmm0, (%edi) - movw %dx, 8(%edi) -# ifdef USE_AS_STPCPY - lea 10(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit11): - movlpd (%esi), %xmm0 - movl 7(%esi), %edx - movlpd %xmm0, (%edi) - movl %edx, 7(%edi) -# ifdef USE_AS_STPCPY - lea 11(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit12): - movlpd (%esi), %xmm0 - movl 8(%esi), %edx - movlpd %xmm0, (%edi) - movl %edx, 8(%edi) -# ifdef USE_AS_STPCPY - lea 12(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit13): - movlpd (%esi), %xmm0 - movlpd 5(%esi), %xmm1 - movlpd %xmm0, (%edi) - movlpd %xmm1, 5(%edi) -# ifdef USE_AS_STPCPY - lea 13(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit14): - movlpd (%esi), %xmm0 - movlpd 6(%esi), %xmm1 - movlpd %xmm0, (%edi) - movlpd %xmm1, 6(%edi) -# ifdef USE_AS_STPCPY - lea 14(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit15): - movlpd (%esi), %xmm0 - movlpd 7(%esi), %xmm1 - movlpd %xmm0, (%edi) - movlpd %xmm1, 7(%edi) -# ifdef USE_AS_STPCPY - lea 15(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit16): - movdqu (%esi), %xmm0 - movdqu %xmm0, (%edi) -# ifdef USE_AS_STPCPY - lea 16(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit17): - movdqu (%esi), %xmm0 - movb 16(%esi), %cl - movdqu %xmm0, (%edi) - movb %cl, 16(%edi) -# ifdef USE_AS_STPCPY - lea 17(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit18): - movdqu (%esi), %xmm0 - movw 16(%esi), %cx - movdqu %xmm0, (%edi) - movw %cx, 16(%edi) -# ifdef USE_AS_STPCPY - lea 18(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit19): - movdqu (%esi), %xmm0 - movl 15(%esi), %ecx - movdqu %xmm0, (%edi) - movl %ecx, 15(%edi) -# ifdef USE_AS_STPCPY - lea 19(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit20): - movdqu (%esi), %xmm0 - movl 16(%esi), %ecx - movdqu %xmm0, (%edi) - movl %ecx, 16(%edi) -# ifdef USE_AS_STPCPY - lea 20(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit21): - movdqu (%esi), %xmm0 - movl 16(%esi), %ecx - movb 20(%esi), %dl - movdqu %xmm0, (%edi) - movl %ecx, 16(%edi) - movb %dl, 20(%edi) -# ifdef USE_AS_STPCPY - lea 21(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit22): - movdqu (%esi), %xmm0 - movlpd 14(%esi), %xmm3 - movdqu %xmm0, (%edi) - movlpd %xmm3, 14(%edi) -# ifdef USE_AS_STPCPY - lea 22(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit23): - movdqu (%esi), %xmm0 - movlpd 15(%esi), %xmm3 - movdqu %xmm0, (%edi) - movlpd %xmm3, 15(%edi) -# ifdef USE_AS_STPCPY - lea 23(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit24): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) -# ifdef USE_AS_STPCPY - lea 24(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit25): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movb 24(%esi), %cl - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movb %cl, 24(%edi) -# ifdef USE_AS_STPCPY - lea 25(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit26): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movw 24(%esi), %cx - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movw %cx, 24(%edi) -# ifdef USE_AS_STPCPY - lea 26(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit27): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movl 23(%esi), %ecx - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movl %ecx, 23(%edi) -# ifdef USE_AS_STPCPY - lea 27(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit28): - movdqu (%esi), %xmm0 - movlpd 16(%esi), %xmm2 - movl 24(%esi), %ecx - movdqu %xmm0, (%edi) - movlpd %xmm2, 16(%edi) - movl %ecx, 24(%edi) -# ifdef USE_AS_STPCPY - lea 28(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit29): - movdqu (%esi), %xmm0 - movdqu 13(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 13(%edi) -# ifdef USE_AS_STPCPY - lea 29(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit30): - movdqu (%esi), %xmm0 - movdqu 14(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 14(%edi) -# ifdef USE_AS_STPCPY - lea 30(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit31): - movdqu (%esi), %xmm0 - movdqu 15(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 15(%edi) -# ifdef USE_AS_STPCPY - lea 31(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit32): - movdqu (%esi), %xmm0 - movdqu 16(%esi), %xmm2 - movdqu %xmm0, (%edi) - movdqu %xmm2, 16(%edi) -# ifdef USE_AS_STPCPY - lea 32(%edi), %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit33): - movdqu (%esi), %xmm0 - movdqu 16(%esi), %xmm2 - movb 32(%esi), %cl - movdqu %xmm0, (%edi) - movdqu %xmm2, 16(%edi) - movb %cl, 32(%edi) - RETURN - - .p2align 4 -L(Fill0): - RETURN - - .p2align 4 -L(Fill1): - movb %dl, (%edi) - RETURN - - .p2align 4 -L(Fill2): - movw %dx, (%edi) - RETURN - - .p2align 4 -L(Fill3): - movl %edx, -1(%edi) - RETURN - - .p2align 4 -L(Fill4): - movl %edx, (%edi) - RETURN - - .p2align 4 -L(Fill5): - movl %edx, (%edi) - movb %dl, 4(%edi) - RETURN - - .p2align 4 -L(Fill6): - movl %edx, (%edi) - movw %dx, 4(%edi) - RETURN - - .p2align 4 -L(Fill7): - movlpd %xmm0, -1(%edi) - RETURN - - .p2align 4 -L(Fill8): - movlpd %xmm0, (%edi) - RETURN - - .p2align 4 -L(Fill9): - movlpd %xmm0, (%edi) - movb %dl, 8(%edi) - RETURN - - .p2align 4 -L(Fill10): - movlpd %xmm0, (%edi) - movw %dx, 8(%edi) - RETURN - - .p2align 4 -L(Fill11): - movlpd %xmm0, (%edi) - movl %edx, 7(%edi) - RETURN - - .p2align 4 -L(Fill12): - movlpd %xmm0, (%edi) - movl %edx, 8(%edi) - RETURN - - .p2align 4 -L(Fill13): - movlpd %xmm0, (%edi) - movlpd %xmm0, 5(%edi) - RETURN - - .p2align 4 -L(Fill14): - movlpd %xmm0, (%edi) - movlpd %xmm0, 6(%edi) - RETURN - - .p2align 4 -L(Fill15): - movdqu %xmm0, -1(%edi) - RETURN - - .p2align 4 -L(Fill16): - movdqu %xmm0, (%edi) - RETURN - - .p2align 4 -L(CopyFrom1To16BytesUnalignedXmm2): - movdqu %xmm2, (%edi, %ecx) - - .p2align 4 -L(CopyFrom1To16BytesXmmExit): - bsf %edx, %edx - add $15, %ebx - add %ecx, %edi -# ifdef USE_AS_STPCPY - lea (%edi, %edx), %eax -# endif - sub %edx, %ebx - lea 1(%edi, %edx), %edi - - .p2align 4 -L(StrncpyFillTailWithZero): - pxor %xmm0, %xmm0 - xor %edx, %edx - sub $16, %ebx - jbe L(StrncpyFillExit) - - movdqu %xmm0, (%edi) - add $16, %edi - - mov %edi, %esi - and $0xf, %esi - sub %esi, %edi - add %esi, %ebx - sub $64, %ebx - jb L(StrncpyFillLess64) - -L(StrncpyFillLoopMovdqa): - movdqa %xmm0, (%edi) - movdqa %xmm0, 16(%edi) - movdqa %xmm0, 32(%edi) - movdqa %xmm0, 48(%edi) - add $64, %edi - sub $64, %ebx - jae L(StrncpyFillLoopMovdqa) - -L(StrncpyFillLess64): - add $32, %ebx - jl L(StrncpyFillLess32) - movdqa %xmm0, (%edi) - movdqa %xmm0, 16(%edi) - add $32, %edi - sub $16, %ebx - jl L(StrncpyFillExit) - movdqa %xmm0, (%edi) - add $16, %edi - BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4) - -L(StrncpyFillLess32): - add $16, %ebx - jl L(StrncpyFillExit) - movdqa %xmm0, (%edi) - add $16, %edi - BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4) - -L(StrncpyFillExit): - add $16, %ebx - BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4) - - .p2align 4 -L(UnalignedLeaveCase2OrCase3): - test %edx, %edx - jnz L(Unaligned64LeaveCase2) -L(Unaligned64LeaveCase3): - lea 64(%ebx), %ecx - and $-16, %ecx - add $48, %ebx - jl L(CopyFrom1To16BytesCase3) - movdqu %xmm4, (%edi) - sub $16, %ebx - jb L(CopyFrom1To16BytesCase3) - movdqu %xmm5, 16(%edi) - sub $16, %ebx - jb L(CopyFrom1To16BytesCase3) - movdqu %xmm6, 32(%edi) - sub $16, %ebx - jb L(CopyFrom1To16BytesCase3) - movdqu %xmm7, 48(%edi) -# ifdef USE_AS_STPCPY - lea 64(%edi), %eax -# endif - RETURN - - .p2align 4 -L(Unaligned64LeaveCase2): - xor %ecx, %ecx - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %edx - add $48, %ebx - jle L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm4) - - pcmpeqb %xmm5, %xmm0 - pmovmskb %xmm0, %edx - movdqu %xmm4, (%edi) - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm5) - - pcmpeqb %xmm6, %xmm0 - pmovmskb %xmm0, %edx - movdqu %xmm5, 16(%edi) - add $16, %ecx - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %edx, %edx - jnz L(CopyFrom1To16BytesUnalignedXmm6) - - pcmpeqb %xmm7, %xmm0 - pmovmskb %xmm0, %edx - movdqu %xmm6, 32(%edi) - lea 16(%edi, %ecx), %edi - lea 16(%esi, %ecx), %esi - bsf %edx, %edx - cmp %ebx, %edx - jb L(CopyFrom1To16BytesExit) - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) - - .p2align 4 -L(ExitZero): - movl %edi, %eax - RETURN - -END (STRCPY) - - .p2align 4 - .section .rodata -L(ExitTable): - .int JMPTBL(L(Exit1), L(ExitTable)) - .int JMPTBL(L(Exit2), L(ExitTable)) - .int JMPTBL(L(Exit3), L(ExitTable)) - .int JMPTBL(L(Exit4), L(ExitTable)) - .int JMPTBL(L(Exit5), L(ExitTable)) - .int JMPTBL(L(Exit6), L(ExitTable)) - .int JMPTBL(L(Exit7), L(ExitTable)) - .int JMPTBL(L(Exit8), L(ExitTable)) - .int JMPTBL(L(Exit9), L(ExitTable)) - .int JMPTBL(L(Exit10), L(ExitTable)) - .int JMPTBL(L(Exit11), L(ExitTable)) - .int JMPTBL(L(Exit12), L(ExitTable)) - .int JMPTBL(L(Exit13), L(ExitTable)) - .int JMPTBL(L(Exit14), L(ExitTable)) - .int JMPTBL(L(Exit15), L(ExitTable)) - .int JMPTBL(L(Exit16), L(ExitTable)) - .int JMPTBL(L(Exit17), L(ExitTable)) - .int JMPTBL(L(Exit18), L(ExitTable)) - .int JMPTBL(L(Exit19), L(ExitTable)) - .int JMPTBL(L(Exit20), L(ExitTable)) - .int JMPTBL(L(Exit21), L(ExitTable)) - .int JMPTBL(L(Exit22), L(ExitTable)) - .int JMPTBL(L(Exit23), L(ExitTable)) - .int JMPTBL(L(Exit24), L(ExitTable)) - .int JMPTBL(L(Exit25), L(ExitTable)) - .int JMPTBL(L(Exit26), L(ExitTable)) - .int JMPTBL(L(Exit27), L(ExitTable)) - .int JMPTBL(L(Exit28), L(ExitTable)) - .int JMPTBL(L(Exit29), L(ExitTable)) - .int JMPTBL(L(Exit30), L(ExitTable)) - .int JMPTBL(L(Exit31), L(ExitTable)) - .int JMPTBL(L(Exit32), L(ExitTable)) - -L(ExitStrncpyTable): - .int JMPTBL(L(Exit0), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable)) - .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable)) - - .p2align 4 -L(FillTable): - .int JMPTBL(L(Fill0), L(FillTable)) - .int JMPTBL(L(Fill1), L(FillTable)) - .int JMPTBL(L(Fill2), L(FillTable)) - .int JMPTBL(L(Fill3), L(FillTable)) - .int JMPTBL(L(Fill4), L(FillTable)) - .int JMPTBL(L(Fill5), L(FillTable)) - .int JMPTBL(L(Fill6), L(FillTable)) - .int JMPTBL(L(Fill7), L(FillTable)) - .int JMPTBL(L(Fill8), L(FillTable)) - .int JMPTBL(L(Fill9), L(FillTable)) - .int JMPTBL(L(Fill10), L(FillTable)) - .int JMPTBL(L(Fill11), L(FillTable)) - .int JMPTBL(L(Fill12), L(FillTable)) - .int JMPTBL(L(Fill13), L(FillTable)) - .int JMPTBL(L(Fill14), L(FillTable)) - .int JMPTBL(L(Fill15), L(FillTable)) - .int JMPTBL(L(Fill16), L(FillTable)) -# else -# define PARMS 4 -# define ENTRANCE -# define RETURN POP (%edi); ret; CFI_PUSH (%edi) -# define RETURN1 ret - - .text -ENTRY (STRCPY) - ENTRANCE - mov STR1(%esp), %edx - mov STR2(%esp), %ecx - - cmpb $0, (%ecx) - jz L(ExitTail1) - cmpb $0, 1(%ecx) - jz L(ExitTail2) - cmpb $0, 2(%ecx) - jz L(ExitTail3) - cmpb $0, 3(%ecx) - jz L(ExitTail4) - cmpb $0, 4(%ecx) - jz L(ExitTail5) - cmpb $0, 5(%ecx) - jz L(ExitTail6) - cmpb $0, 6(%ecx) - jz L(ExitTail7) - cmpb $0, 7(%ecx) - jz L(ExitTail8) - cmpb $0, 8(%ecx) - jz L(ExitTail9) - cmpb $0, 9(%ecx) - jz L(ExitTail10) - cmpb $0, 10(%ecx) - jz L(ExitTail11) - cmpb $0, 11(%ecx) - jz L(ExitTail12) - cmpb $0, 12(%ecx) - jz L(ExitTail13) - cmpb $0, 13(%ecx) - jz L(ExitTail14) - cmpb $0, 14(%ecx) - jz L(ExitTail15) - cmpb $0, 15(%ecx) - jz L(ExitTail16) - - PUSH (%edi) - PUSH (%ebx) - - mov %edx, %edi - lea 16(%ecx), %ebx - and $-16, %ebx - pxor %xmm0, %xmm0 - movdqu (%ecx), %xmm1 - movdqu %xmm1, (%edx) - pcmpeqb (%ebx), %xmm0 - pmovmskb %xmm0, %eax - sub %ecx, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - mov %ecx, %eax - lea 16(%ecx), %ecx - and $-16, %ecx - sub %ecx, %eax - sub %eax, %edx - xor %ebx, %ebx - - .p2align 4 - movdqa (%ecx), %xmm1 - movaps 16(%ecx), %xmm2 - movdqu %xmm1, (%edx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - add $16, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %ebx), %xmm3 - movdqu %xmm2, (%edx, %ebx) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %eax - add $16, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %ebx), %xmm4 - movdqu %xmm3, (%edx, %ebx) - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %eax - add $16, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %ebx), %xmm1 - movdqu %xmm4, (%edx, %ebx) - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - add $16, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %ebx), %xmm2 - movdqu %xmm1, (%edx, %ebx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - add $16, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %ebx), %xmm3 - movdqu %xmm2, (%edx, %ebx) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %eax - add $16, %ebx - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movdqu %xmm3, (%edx, %ebx) - mov %ecx, %eax - lea 16(%ecx, %ebx), %ecx - and $-0x40, %ecx - sub %ecx, %eax - sub %eax, %edx - -L(Aligned64Loop): - movaps (%ecx), %xmm2 - movaps %xmm2, %xmm4 - movaps 16(%ecx), %xmm5 - movaps 32(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 48(%ecx), %xmm7 - pminub %xmm5, %xmm2 - add $64, %ecx - pminub %xmm7, %xmm3 - add $64, %edx - pminub %xmm2, %xmm3 - pcmpeqb %xmm0, %xmm3 - pmovmskb %xmm3, %eax - test %eax, %eax - jnz L(Aligned64Leave) -L(Aligned64Loop_start): - movdqu %xmm4, -64(%edx) - movaps (%ecx), %xmm2 - movdqa %xmm2, %xmm4 - movdqu %xmm5, -48(%edx) - movaps 16(%ecx), %xmm5 - pminub %xmm5, %xmm2 - movaps 32(%ecx), %xmm3 - movdqu %xmm6, -32(%edx) - movaps %xmm3, %xmm6 - movdqu %xmm7, -16(%edx) - movaps 48(%ecx), %xmm7 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %eax - add $64, %edx - add $64, %ecx - test %eax, %eax - jz L(Aligned64Loop_start) -L(Aligned64Leave): - sub $0xa0, %ebx - pxor %xmm0, %xmm0 - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm5, %xmm0 - pmovmskb %xmm0, %eax - movdqu %xmm4, -64(%edx) - test %eax, %eax - lea 16(%ebx), %ebx - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm6, %xmm0 - pmovmskb %xmm0, %eax - movdqu %xmm5, -48(%edx) - test %eax, %eax - lea 16(%ebx), %ebx - jnz L(CopyFrom1To16Bytes) - - movdqu %xmm6, -32(%edx) - pcmpeqb %xmm7, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%ebx), %ebx - -/*-----------------End of main part---------------------------*/ - - .p2align 4 -L(CopyFrom1To16Bytes): - add %ebx, %edx - add %ebx, %ecx - - POP (%ebx) - test %al, %al - jz L(ExitHigh) - test $0x01, %al - jnz L(Exit1) - test $0x02, %al - jnz L(Exit2) - test $0x04, %al - jnz L(Exit3) - test $0x08, %al - jnz L(Exit4) - test $0x10, %al - jnz L(Exit5) - test $0x20, %al - jnz L(Exit6) - test $0x40, %al - jnz L(Exit7) - /* Exit 8 */ - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) -# ifdef USE_AS_STPCPY - lea 7(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(ExitHigh): - test $0x01, %ah - jnz L(Exit9) - test $0x02, %ah - jnz L(Exit10) - test $0x04, %ah - jnz L(Exit11) - test $0x08, %ah - jnz L(Exit12) - test $0x10, %ah - jnz L(Exit13) - test $0x20, %ah - jnz L(Exit14) - test $0x40, %ah - jnz L(Exit15) - /* Exit 16 */ - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 8(%ecx), %xmm0 - movlpd %xmm0, 8(%edx) -# ifdef USE_AS_STPCPY - lea 15(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit1): - movb (%ecx), %al - movb %al, (%edx) -# ifdef USE_AS_STPCPY - lea (%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit2): - movw (%ecx), %ax - movw %ax, (%edx) -# ifdef USE_AS_STPCPY - lea 1(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit3): - movw (%ecx), %ax - movw %ax, (%edx) - movb 2(%ecx), %al - movb %al, 2(%edx) -# ifdef USE_AS_STPCPY - lea 2(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit4): - movl (%ecx), %eax - movl %eax, (%edx) -# ifdef USE_AS_STPCPY - lea 3(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit5): - movl (%ecx), %eax - movl %eax, (%edx) - movb 4(%ecx), %al - movb %al, 4(%edx) -# ifdef USE_AS_STPCPY - lea 4(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit6): - movl (%ecx), %eax - movl %eax, (%edx) - movw 4(%ecx), %ax - movw %ax, 4(%edx) -# ifdef USE_AS_STPCPY - lea 5(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit7): - movl (%ecx), %eax - movl %eax, (%edx) - movl 3(%ecx), %eax - movl %eax, 3(%edx) -# ifdef USE_AS_STPCPY - lea 6(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit9): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movb 8(%ecx), %al - movb %al, 8(%edx) -# ifdef USE_AS_STPCPY - lea 8(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit10): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movw 8(%ecx), %ax - movw %ax, 8(%edx) -# ifdef USE_AS_STPCPY - lea 9(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit11): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movl 7(%ecx), %eax - movl %eax, 7(%edx) -# ifdef USE_AS_STPCPY - lea 10(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit12): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movl 8(%ecx), %eax - movl %eax, 8(%edx) -# ifdef USE_AS_STPCPY - lea 11(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit13): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 5(%ecx), %xmm0 - movlpd %xmm0, 5(%edx) -# ifdef USE_AS_STPCPY - lea 12(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit14): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 6(%ecx), %xmm0 - movlpd %xmm0, 6(%edx) -# ifdef USE_AS_STPCPY - lea 13(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - - .p2align 4 -L(Exit15): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 7(%ecx), %xmm0 - movlpd %xmm0, 7(%edx) -# ifdef USE_AS_STPCPY - lea 14(%edx), %eax -# else - movl %edi, %eax -# endif - RETURN - -CFI_POP (%edi) - - .p2align 4 -L(ExitTail1): - movb (%ecx), %al - movb %al, (%edx) - movl %edx, %eax - RETURN1 - - .p2align 4 -L(ExitTail2): - movw (%ecx), %ax - movw %ax, (%edx) -# ifdef USE_AS_STPCPY - lea 1(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail3): - movw (%ecx), %ax - movw %ax, (%edx) - movb 2(%ecx), %al - movb %al, 2(%edx) -# ifdef USE_AS_STPCPY - lea 2(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail4): - movl (%ecx), %eax - movl %eax, (%edx) -# ifdef USE_AS_STPCPY - lea 3(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail5): - movl (%ecx), %eax - movl %eax, (%edx) - movb 4(%ecx), %al - movb %al, 4(%edx) -# ifdef USE_AS_STPCPY - lea 4(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail6): - movl (%ecx), %eax - movl %eax, (%edx) - movw 4(%ecx), %ax - movw %ax, 4(%edx) -# ifdef USE_AS_STPCPY - lea 5(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail7): - movl (%ecx), %eax - movl %eax, (%edx) - movl 3(%ecx), %eax - movl %eax, 3(%edx) -# ifdef USE_AS_STPCPY - lea 6(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail8): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) -# ifdef USE_AS_STPCPY - lea 7(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail9): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movb 8(%ecx), %al - movb %al, 8(%edx) -# ifdef USE_AS_STPCPY - lea 8(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail10): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movw 8(%ecx), %ax - movw %ax, 8(%edx) -# ifdef USE_AS_STPCPY - lea 9(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail11): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movl 7(%ecx), %eax - movl %eax, 7(%edx) -# ifdef USE_AS_STPCPY - lea 10(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail12): - movl (%ecx), %eax - movl %eax, (%edx) - movl 4(%ecx), %eax - movl %eax, 4(%edx) - movl 8(%ecx), %eax - movl %eax, 8(%edx) -# ifdef USE_AS_STPCPY - lea 11(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail13): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 5(%ecx), %xmm0 - movlpd %xmm0, 5(%edx) -# ifdef USE_AS_STPCPY - lea 12(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail14): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 6(%ecx), %xmm0 - movlpd %xmm0, 6(%edx) -# ifdef USE_AS_STPCPY - lea 13(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail15): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 7(%ecx), %xmm0 - movlpd %xmm0, 7(%edx) -# ifdef USE_AS_STPCPY - lea 14(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - - .p2align 4 -L(ExitTail16): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 8(%ecx), %xmm0 - movlpd %xmm0, 8(%edx) -# ifdef USE_AS_STPCPY - lea 15(%edx), %eax -# else - movl %edx, %eax -# endif - RETURN1 - -END (STRCPY) -# endif - -#endif diff --git a/sysdeps/i386/i686/multiarch/strcpy-ssse3.S b/sysdeps/i386/i686/multiarch/strcpy-ssse3.S deleted file mode 100644 index effd85da94..0000000000 --- a/sysdeps/i386/i686/multiarch/strcpy-ssse3.S +++ /dev/null @@ -1,3901 +0,0 @@ -/* strcpy with SSSE3 - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - - -#if IS_IN (libc) - -# ifndef USE_AS_STRCAT -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# ifndef STRCPY -# define STRCPY __strcpy_ssse3 -# endif - -# ifdef USE_AS_STRNCPY -# define PARMS 8 -# define ENTRANCE PUSH (%ebx) -# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx); -# define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi) -# else -# define PARMS 4 -# define ENTRANCE -# define RETURN ret -# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi) -# endif - -# ifdef USE_AS_STPCPY -# define SAVE_RESULT(n) lea n(%edx), %eax -# define SAVE_RESULT_TAIL(n) lea n(%edx), %eax -# else -# define SAVE_RESULT(n) movl %edi, %eax -# define SAVE_RESULT_TAIL(n) movl %edx, %eax -# endif - -# define STR1 PARMS -# define STR2 STR1+4 -# define LEN STR2+4 - -/* In this code following instructions are used for copying: - movb - 1 byte - movw - 2 byte - movl - 4 byte - movlpd - 8 byte - movaps - 16 byte - requires 16 byte alignment - of sourse and destination adresses. -*/ - -.text -ENTRY (STRCPY) - ENTRANCE - mov STR1(%esp), %edx - mov STR2(%esp), %ecx -# ifdef USE_AS_STRNCPY - movl LEN(%esp), %ebx - cmp $8, %ebx - jbe L(StrncpyExit8Bytes) -# endif - cmpb $0, (%ecx) - jz L(ExitTail1) - cmpb $0, 1(%ecx) - jz L(ExitTail2) - cmpb $0, 2(%ecx) - jz L(ExitTail3) - cmpb $0, 3(%ecx) - jz L(ExitTail4) - cmpb $0, 4(%ecx) - jz L(ExitTail5) - cmpb $0, 5(%ecx) - jz L(ExitTail6) - cmpb $0, 6(%ecx) - jz L(ExitTail7) - cmpb $0, 7(%ecx) - jz L(ExitTail8) -# ifdef USE_AS_STRNCPY - cmp $16, %ebx - jb L(StrncpyExit15Bytes) -# endif - cmpb $0, 8(%ecx) - jz L(ExitTail9) - cmpb $0, 9(%ecx) - jz L(ExitTail10) - cmpb $0, 10(%ecx) - jz L(ExitTail11) - cmpb $0, 11(%ecx) - jz L(ExitTail12) - cmpb $0, 12(%ecx) - jz L(ExitTail13) - cmpb $0, 13(%ecx) - jz L(ExitTail14) - cmpb $0, 14(%ecx) - jz L(ExitTail15) -# ifdef USE_AS_STRNCPY - cmp $16, %ebx - je L(ExitTail16) -# endif - cmpb $0, 15(%ecx) - jz L(ExitTail16) - - PUSH (%edi) - mov %edx, %edi -# endif - PUSH (%esi) -# ifdef USE_AS_STRNCPY - mov %ecx, %esi - sub $16, %ebx - and $0xf, %esi - -/* add 16 bytes ecx_offset to ebx */ - - add %esi, %ebx -# endif - lea 16(%ecx), %esi - and $-16, %esi - pxor %xmm0, %xmm0 - movlpd (%ecx), %xmm1 - movlpd %xmm1, (%edx) - - pcmpeqb (%esi), %xmm0 - movlpd 8(%ecx), %xmm1 - movlpd %xmm1, 8(%edx) - - pmovmskb %xmm0, %eax - sub %ecx, %esi - -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - mov %edx, %eax - lea 16(%edx), %edx - and $-16, %edx - sub %edx, %eax - -# ifdef USE_AS_STRNCPY - add %eax, %esi - lea -1(%esi), %esi - and $1<<31, %esi - test %esi, %esi - jnz L(ContinueCopy) - lea 16(%ebx), %ebx - -L(ContinueCopy): -# endif - sub %eax, %ecx - mov %ecx, %eax - and $0xf, %eax - mov $0, %esi - -/* case: ecx_offset == edx_offset */ - - jz L(Align16Both) - - cmp $8, %eax - jae L(ShlHigh8) - cmp $1, %eax - je L(Shl1) - cmp $2, %eax - je L(Shl2) - cmp $3, %eax - je L(Shl3) - cmp $4, %eax - je L(Shl4) - cmp $5, %eax - je L(Shl5) - cmp $6, %eax - je L(Shl6) - jmp L(Shl7) - -L(ShlHigh8): - je L(Shl8) - cmp $9, %eax - je L(Shl9) - cmp $10, %eax - je L(Shl10) - cmp $11, %eax - je L(Shl11) - cmp $12, %eax - je L(Shl12) - cmp $13, %eax - je L(Shl13) - cmp $14, %eax - je L(Shl14) - jmp L(Shl15) - -L(Align16Both): - movaps (%ecx), %xmm1 - movaps 16(%ecx), %xmm2 - movaps %xmm1, (%edx) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm3 - movaps %xmm2, (%edx, %esi) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm4 - movaps %xmm3, (%edx, %esi) - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm1 - movaps %xmm4, (%edx, %esi) - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm2 - movaps %xmm1, (%edx, %esi) - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm3 - movaps %xmm2, (%edx, %esi) - pcmpeqb %xmm3, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) -# endif - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps %xmm3, (%edx, %esi) - mov %ecx, %eax - lea 16(%ecx, %esi), %ecx - and $-0x40, %ecx - sub %ecx, %eax - sub %eax, %edx -# ifdef USE_AS_STRNCPY - lea 112(%ebx, %eax), %ebx -# endif - mov $-0x40, %esi - -L(Aligned64Loop): - movaps (%ecx), %xmm2 - movaps 32(%ecx), %xmm3 - movaps %xmm2, %xmm4 - movaps 16(%ecx), %xmm5 - movaps %xmm3, %xmm6 - movaps 48(%ecx), %xmm7 - pminub %xmm5, %xmm2 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - lea 64(%edx), %edx - pcmpeqb %xmm0, %xmm3 - lea 64(%ecx), %ecx - pmovmskb %xmm3, %eax -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeaveCase2OrCase3) -# endif - test %eax, %eax - jnz L(Aligned64Leave) - movaps %xmm4, -64(%edx) - movaps %xmm5, -48(%edx) - movaps %xmm6, -32(%edx) - movaps %xmm7, -16(%edx) - jmp L(Aligned64Loop) - -L(Aligned64Leave): -# ifdef USE_AS_STRNCPY - lea 48(%ebx), %ebx -# endif - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm5, %xmm0 -# ifdef USE_AS_STRNCPY - lea -16(%ebx), %ebx -# endif - pmovmskb %xmm0, %eax - movaps %xmm4, -64(%edx) - test %eax, %eax - lea 16(%esi), %esi - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm6, %xmm0 -# ifdef USE_AS_STRNCPY - lea -16(%ebx), %ebx -# endif - pmovmskb %xmm0, %eax - movaps %xmm5, -48(%edx) - test %eax, %eax - lea 16(%esi), %esi - jnz L(CopyFrom1To16Bytes) - - movaps %xmm6, -32(%edx) - pcmpeqb %xmm7, %xmm0 -# ifdef USE_AS_STRNCPY - lea -16(%ebx), %ebx -# endif - pmovmskb %xmm0, %eax - lea 16(%esi), %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl1): - movaps -1(%ecx), %xmm1 - movaps 15(%ecx), %xmm2 -L(Shl1Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit1Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl1LoopExit) - - palignr $1, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 31(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit1Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl1LoopExit) - - palignr $1, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 31(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit1Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl1LoopExit) - - palignr $1, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 31(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit1Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl1LoopExit) - - palignr $1, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 31(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -15(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -1(%ecx), %xmm1 - -L(Shl1LoopStart): - movaps 15(%ecx), %xmm2 - movaps 31(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 47(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 63(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $1, %xmm4, %xmm5 - test %eax, %eax - palignr $1, %xmm3, %xmm4 - jnz L(Shl1Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave1) -# endif - palignr $1, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $1, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl1LoopStart) - -L(Shl1LoopExit): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movlpd 7(%ecx), %xmm0 - movlpd %xmm0, 7(%edx) - mov $15, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl2): - movaps -2(%ecx), %xmm1 - movaps 14(%ecx), %xmm2 -L(Shl2Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit2Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl2LoopExit) - - palignr $2, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 30(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit2Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl2LoopExit) - - palignr $2, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 30(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit2Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl2LoopExit) - - palignr $2, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 30(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit2Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl2LoopExit) - - palignr $2, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 30(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -14(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -2(%ecx), %xmm1 - -L(Shl2LoopStart): - movaps 14(%ecx), %xmm2 - movaps 30(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 46(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 62(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $2, %xmm4, %xmm5 - test %eax, %eax - palignr $2, %xmm3, %xmm4 - jnz L(Shl2Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave2) -# endif - palignr $2, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $2, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl2LoopStart) - -L(Shl2LoopExit): - movlpd (%ecx), %xmm0 - movlpd 6(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 6(%edx) - mov $14, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl3): - movaps -3(%ecx), %xmm1 - movaps 13(%ecx), %xmm2 -L(Shl3Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit3Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl3LoopExit) - - palignr $3, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 29(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit3Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl3LoopExit) - - palignr $3, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 29(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit3Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl3LoopExit) - - palignr $3, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 29(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit3Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl3LoopExit) - - palignr $3, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 29(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -13(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -3(%ecx), %xmm1 - -L(Shl3LoopStart): - movaps 13(%ecx), %xmm2 - movaps 29(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 45(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 61(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $3, %xmm4, %xmm5 - test %eax, %eax - palignr $3, %xmm3, %xmm4 - jnz L(Shl3Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave3) -# endif - palignr $3, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $3, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl3LoopStart) - -L(Shl3LoopExit): - movlpd (%ecx), %xmm0 - movlpd 5(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 5(%edx) - mov $13, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl4): - movaps -4(%ecx), %xmm1 - movaps 12(%ecx), %xmm2 -L(Shl4Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit4Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl4LoopExit) - - palignr $4, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 28(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit4Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl4LoopExit) - - palignr $4, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 28(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit4Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl4LoopExit) - - palignr $4, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 28(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit4Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl4LoopExit) - - palignr $4, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 28(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -12(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -4(%ecx), %xmm1 - -L(Shl4LoopStart): - movaps 12(%ecx), %xmm2 - movaps 28(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 44(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 60(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $4, %xmm4, %xmm5 - test %eax, %eax - palignr $4, %xmm3, %xmm4 - jnz L(Shl4Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave4) -# endif - palignr $4, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $4, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl4LoopStart) - -L(Shl4LoopExit): - movlpd (%ecx), %xmm0 - movl 8(%ecx), %esi - movlpd %xmm0, (%edx) - movl %esi, 8(%edx) - mov $12, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl5): - movaps -5(%ecx), %xmm1 - movaps 11(%ecx), %xmm2 -L(Shl5Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit5Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl5LoopExit) - - palignr $5, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 27(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit5Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl5LoopExit) - - palignr $5, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 27(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit5Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl5LoopExit) - - palignr $5, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 27(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit5Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl5LoopExit) - - palignr $5, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 27(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -11(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -5(%ecx), %xmm1 - -L(Shl5LoopStart): - movaps 11(%ecx), %xmm2 - movaps 27(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 43(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 59(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $5, %xmm4, %xmm5 - test %eax, %eax - palignr $5, %xmm3, %xmm4 - jnz L(Shl5Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave5) -# endif - palignr $5, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $5, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl5LoopStart) - -L(Shl5LoopExit): - movlpd (%ecx), %xmm0 - movl 7(%ecx), %esi - movlpd %xmm0, (%edx) - movl %esi, 7(%edx) - mov $11, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl6): - movaps -6(%ecx), %xmm1 - movaps 10(%ecx), %xmm2 -L(Shl6Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit6Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl6LoopExit) - - palignr $6, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 26(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit6Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl6LoopExit) - - palignr $6, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 26(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit6Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl6LoopExit) - - palignr $6, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 26(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit6Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl6LoopExit) - - palignr $6, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 26(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -10(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -6(%ecx), %xmm1 - -L(Shl6LoopStart): - movaps 10(%ecx), %xmm2 - movaps 26(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 42(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 58(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $6, %xmm4, %xmm5 - test %eax, %eax - palignr $6, %xmm3, %xmm4 - jnz L(Shl6Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave6) -# endif - palignr $6, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $6, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl6LoopStart) - -L(Shl6LoopExit): - movlpd (%ecx), %xmm0 - movl 6(%ecx), %esi - movlpd %xmm0, (%edx) - movl %esi, 6(%edx) - mov $10, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl7): - movaps -7(%ecx), %xmm1 - movaps 9(%ecx), %xmm2 -L(Shl7Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit7Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl7LoopExit) - - palignr $7, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 25(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit7Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl7LoopExit) - - palignr $7, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 25(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit7Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl7LoopExit) - - palignr $7, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 25(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit7Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl7LoopExit) - - palignr $7, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 25(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -9(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -7(%ecx), %xmm1 - -L(Shl7LoopStart): - movaps 9(%ecx), %xmm2 - movaps 25(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 41(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 57(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $7, %xmm4, %xmm5 - test %eax, %eax - palignr $7, %xmm3, %xmm4 - jnz L(Shl7Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave7) -# endif - palignr $7, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $7, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl7LoopStart) - -L(Shl7LoopExit): - movlpd (%ecx), %xmm0 - movl 5(%ecx), %esi - movlpd %xmm0, (%edx) - movl %esi, 5(%edx) - mov $9, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl8): - movaps -8(%ecx), %xmm1 - movaps 8(%ecx), %xmm2 -L(Shl8Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit8Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl8LoopExit) - - palignr $8, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 24(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit8Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl8LoopExit) - - palignr $8, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 24(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit8Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl8LoopExit) - - palignr $8, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 24(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit8Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl8LoopExit) - - palignr $8, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 24(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -8(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -8(%ecx), %xmm1 - -L(Shl8LoopStart): - movaps 8(%ecx), %xmm2 - movaps 24(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 40(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 56(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $8, %xmm4, %xmm5 - test %eax, %eax - palignr $8, %xmm3, %xmm4 - jnz L(Shl8Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave8) -# endif - palignr $8, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $8, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl8LoopStart) - -L(Shl8LoopExit): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - mov $8, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl9): - movaps -9(%ecx), %xmm1 - movaps 7(%ecx), %xmm2 -L(Shl9Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit9Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl9LoopExit) - - palignr $9, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 23(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit9Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl9LoopExit) - - palignr $9, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 23(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit9Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl9LoopExit) - - palignr $9, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 23(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit9Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl9LoopExit) - - palignr $9, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 23(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -7(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -9(%ecx), %xmm1 - -L(Shl9LoopStart): - movaps 7(%ecx), %xmm2 - movaps 23(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 39(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 55(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $9, %xmm4, %xmm5 - test %eax, %eax - palignr $9, %xmm3, %xmm4 - jnz L(Shl9Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave9) -# endif - palignr $9, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $9, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl9LoopStart) - -L(Shl9LoopExit): - movlpd -1(%ecx), %xmm0 - movlpd %xmm0, -1(%edx) - mov $7, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl10): - movaps -10(%ecx), %xmm1 - movaps 6(%ecx), %xmm2 -L(Shl10Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit10Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl10LoopExit) - - palignr $10, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 22(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit10Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl10LoopExit) - - palignr $10, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 22(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit10Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl10LoopExit) - - palignr $10, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 22(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit10Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl10LoopExit) - - palignr $10, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 22(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -6(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -10(%ecx), %xmm1 - -L(Shl10LoopStart): - movaps 6(%ecx), %xmm2 - movaps 22(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 38(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 54(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $10, %xmm4, %xmm5 - test %eax, %eax - palignr $10, %xmm3, %xmm4 - jnz L(Shl10Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave10) -# endif - palignr $10, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $10, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl10LoopStart) - -L(Shl10LoopExit): - movlpd -2(%ecx), %xmm0 - movlpd %xmm0, -2(%edx) - mov $6, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl11): - movaps -11(%ecx), %xmm1 - movaps 5(%ecx), %xmm2 -L(Shl11Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit11Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl11LoopExit) - - palignr $11, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 21(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit11Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl11LoopExit) - - palignr $11, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 21(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit11Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl11LoopExit) - - palignr $11, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 21(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit11Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl11LoopExit) - - palignr $11, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 21(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -5(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -11(%ecx), %xmm1 - -L(Shl11LoopStart): - movaps 5(%ecx), %xmm2 - movaps 21(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 37(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 53(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $11, %xmm4, %xmm5 - test %eax, %eax - palignr $11, %xmm3, %xmm4 - jnz L(Shl11Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave11) -# endif - palignr $11, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $11, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl11LoopStart) - -L(Shl11LoopExit): - movlpd -3(%ecx), %xmm0 - movlpd %xmm0, -3(%edx) - mov $5, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl12): - movaps -12(%ecx), %xmm1 - movaps 4(%ecx), %xmm2 -L(Shl12Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit12Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl12LoopExit) - - palignr $12, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 20(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit12Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl12LoopExit) - - palignr $12, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 20(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit12Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl12LoopExit) - - palignr $12, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 20(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit12Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl12LoopExit) - - palignr $12, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 20(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -4(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -12(%ecx), %xmm1 - -L(Shl12LoopStart): - movaps 4(%ecx), %xmm2 - movaps 20(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 36(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 52(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $12, %xmm4, %xmm5 - test %eax, %eax - palignr $12, %xmm3, %xmm4 - jnz L(Shl12Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave12) -# endif - palignr $12, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $12, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl12LoopStart) - -L(Shl12LoopExit): - movl (%ecx), %esi - movl %esi, (%edx) - mov $4, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl13): - movaps -13(%ecx), %xmm1 - movaps 3(%ecx), %xmm2 -L(Shl13Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit13Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl13LoopExit) - - palignr $13, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 19(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit13Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl13LoopExit) - - palignr $13, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 19(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit13Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl13LoopExit) - - palignr $13, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 19(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit13Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl13LoopExit) - - palignr $13, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 19(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -3(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -13(%ecx), %xmm1 - -L(Shl13LoopStart): - movaps 3(%ecx), %xmm2 - movaps 19(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 35(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 51(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $13, %xmm4, %xmm5 - test %eax, %eax - palignr $13, %xmm3, %xmm4 - jnz L(Shl13Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave13) -# endif - palignr $13, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $13, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl13LoopStart) - -L(Shl13LoopExit): - movl -1(%ecx), %esi - movl %esi, -1(%edx) - mov $3, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl14): - movaps -14(%ecx), %xmm1 - movaps 2(%ecx), %xmm2 -L(Shl14Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit14Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl14LoopExit) - - palignr $14, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 18(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit14Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl14LoopExit) - - palignr $14, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 18(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit14Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl14LoopExit) - - palignr $14, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 18(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit14Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl14LoopExit) - - palignr $14, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 18(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -2(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -14(%ecx), %xmm1 - -L(Shl14LoopStart): - movaps 2(%ecx), %xmm2 - movaps 18(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 34(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 50(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $14, %xmm4, %xmm5 - test %eax, %eax - palignr $14, %xmm3, %xmm4 - jnz L(Shl14Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave14) -# endif - palignr $14, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $14, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl14LoopStart) - -L(Shl14LoopExit): - movl -2(%ecx), %esi - movl %esi, -2(%edx) - mov $2, %esi - jmp L(CopyFrom1To16Bytes) - - .p2align 4 -L(Shl15): - movaps -15(%ecx), %xmm1 - movaps 1(%ecx), %xmm2 -L(Shl15Start): - pcmpeqb %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit15Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl15LoopExit) - - palignr $15, %xmm1, %xmm2 - movaps %xmm3, %xmm1 - movaps %xmm2, (%edx) - movaps 17(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit15Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl15LoopExit) - - palignr $15, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 17(%ecx), %xmm2 - movaps %xmm3, %xmm1 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit15Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl15LoopExit) - - palignr $15, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 17(%ecx), %xmm2 - - pcmpeqb %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx -# ifdef USE_AS_STRNCPY - sub $16, %ebx - jbe L(StrncpyExit15Case2OrCase3) -# endif - test %eax, %eax - jnz L(Shl15LoopExit) - - palignr $15, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 17(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -1(%ecx), %ecx - sub %eax, %edx -# ifdef USE_AS_STRNCPY - add %eax, %ebx -# endif - movaps -15(%ecx), %xmm1 - -L(Shl15LoopStart): - movaps 1(%ecx), %xmm2 - movaps 17(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 33(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 49(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqb %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $15, %xmm4, %xmm5 - test %eax, %eax - palignr $15, %xmm3, %xmm4 - jnz L(Shl15Start) -# ifdef USE_AS_STRNCPY - sub $64, %ebx - jbe L(StrncpyLeave15) -# endif - palignr $15, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $15, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl15LoopStart) - -L(Shl15LoopExit): - movl -3(%ecx), %esi - movl %esi, -3(%edx) - mov $1, %esi -# ifdef USE_AS_STRCAT - jmp L(CopyFrom1To16Bytes) -# endif - - -# ifndef USE_AS_STRCAT - - .p2align 4 -L(CopyFrom1To16Bytes): -# ifdef USE_AS_STRNCPY - add $16, %ebx -# endif - add %esi, %edx - add %esi, %ecx - - POP (%esi) - test %al, %al - jz L(ExitHigh8) - -L(CopyFrom1To16BytesLess8): - mov %al, %ah - and $15, %ah - jz L(ExitHigh4) - - test $0x01, %al - jnz L(Exit1) - test $0x02, %al - jnz L(Exit2) - test $0x04, %al - jnz L(Exit3) - - .p2align 4 -L(Exit4): - movl (%ecx), %eax - movl %eax, (%edx) - SAVE_RESULT (3) -# ifdef USE_AS_STRNCPY - sub $4, %ebx - lea 4(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(ExitHigh4): - test $0x10, %al - jnz L(Exit5) - test $0x20, %al - jnz L(Exit6) - test $0x40, %al - jnz L(Exit7) - - .p2align 4 -L(Exit8): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - SAVE_RESULT (7) -# ifdef USE_AS_STRNCPY - sub $8, %ebx - lea 8(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(ExitHigh8): - mov %ah, %al - and $15, %al - jz L(ExitHigh12) - - test $0x01, %ah - jnz L(Exit9) - test $0x02, %ah - jnz L(Exit10) - test $0x04, %ah - jnz L(Exit11) - - .p2align 4 -L(Exit12): - movlpd (%ecx), %xmm0 - movl 8(%ecx), %eax - movlpd %xmm0, (%edx) - movl %eax, 8(%edx) - SAVE_RESULT (11) -# ifdef USE_AS_STRNCPY - sub $12, %ebx - lea 12(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(ExitHigh12): - test $0x10, %ah - jnz L(Exit13) - test $0x20, %ah - jnz L(Exit14) - test $0x40, %ah - jnz L(Exit15) - - .p2align 4 -L(Exit16): - movdqu (%ecx), %xmm0 - movdqu %xmm0, (%edx) - SAVE_RESULT (15) -# ifdef USE_AS_STRNCPY - sub $16, %ebx - lea 16(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - -# ifdef USE_AS_STRNCPY - - CFI_PUSH(%esi) - - .p2align 4 -L(CopyFrom1To16BytesCase2): - add $16, %ebx - add %esi, %ecx - add %esi, %edx - - POP (%esi) - - test %al, %al - jz L(ExitHighCase2) - - cmp $8, %ebx - ja L(CopyFrom1To16BytesLess8) - - test $0x01, %al - jnz L(Exit1) - cmp $1, %ebx - je L(Exit1) - test $0x02, %al - jnz L(Exit2) - cmp $2, %ebx - je L(Exit2) - test $0x04, %al - jnz L(Exit3) - cmp $3, %ebx - je L(Exit3) - test $0x08, %al - jnz L(Exit4) - cmp $4, %ebx - je L(Exit4) - test $0x10, %al - jnz L(Exit5) - cmp $5, %ebx - je L(Exit5) - test $0x20, %al - jnz L(Exit6) - cmp $6, %ebx - je L(Exit6) - test $0x40, %al - jnz L(Exit7) - cmp $7, %ebx - je L(Exit7) - jmp L(Exit8) - - .p2align 4 -L(ExitHighCase2): - cmp $8, %ebx - jbe L(CopyFrom1To16BytesLess8Case3) - - test $0x01, %ah - jnz L(Exit9) - cmp $9, %ebx - je L(Exit9) - test $0x02, %ah - jnz L(Exit10) - cmp $10, %ebx - je L(Exit10) - test $0x04, %ah - jnz L(Exit11) - cmp $11, %ebx - je L(Exit11) - test $0x8, %ah - jnz L(Exit12) - cmp $12, %ebx - je L(Exit12) - test $0x10, %ah - jnz L(Exit13) - cmp $13, %ebx - je L(Exit13) - test $0x20, %ah - jnz L(Exit14) - cmp $14, %ebx - je L(Exit14) - test $0x40, %ah - jnz L(Exit15) - cmp $15, %ebx - je L(Exit15) - jmp L(Exit16) - - CFI_PUSH(%esi) - - .p2align 4 -L(CopyFrom1To16BytesCase2OrCase3): - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - - .p2align 4 -L(CopyFrom1To16BytesCase3): - add $16, %ebx - add %esi, %edx - add %esi, %ecx - - POP (%esi) - - cmp $8, %ebx - ja L(ExitHigh8Case3) - -L(CopyFrom1To16BytesLess8Case3): - cmp $4, %ebx - ja L(ExitHigh4Case3) - - cmp $1, %ebx - je L(Exit1) - cmp $2, %ebx - je L(Exit2) - cmp $3, %ebx - je L(Exit3) - movl (%ecx), %eax - movl %eax, (%edx) - SAVE_RESULT (4) - RETURN1 - - .p2align 4 -L(ExitHigh4Case3): - cmp $5, %ebx - je L(Exit5) - cmp $6, %ebx - je L(Exit6) - cmp $7, %ebx - je L(Exit7) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - SAVE_RESULT (8) - RETURN1 - - .p2align 4 -L(ExitHigh8Case3): - cmp $12, %ebx - ja L(ExitHigh12Case3) - - cmp $9, %ebx - je L(Exit9) - cmp $10, %ebx - je L(Exit10) - cmp $11, %ebx - je L(Exit11) - movlpd (%ecx), %xmm0 - movl 8(%ecx), %eax - movlpd %xmm0, (%edx) - movl %eax, 8(%edx) - SAVE_RESULT (12) - RETURN1 - - .p2align 4 -L(ExitHigh12Case3): - cmp $13, %ebx - je L(Exit13) - cmp $14, %ebx - je L(Exit14) - cmp $15, %ebx - je L(Exit15) - movlpd (%ecx), %xmm0 - movlpd 8(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 8(%edx) - SAVE_RESULT (16) - RETURN1 - -# endif - - .p2align 4 -L(Exit1): - movb (%ecx), %al - movb %al, (%edx) - SAVE_RESULT (0) -# ifdef USE_AS_STRNCPY - sub $1, %ebx - lea 1(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit2): - movw (%ecx), %ax - movw %ax, (%edx) - SAVE_RESULT (1) -# ifdef USE_AS_STRNCPY - sub $2, %ebx - lea 2(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit3): - movw (%ecx), %ax - movw %ax, (%edx) - movb 2(%ecx), %al - movb %al, 2(%edx) - SAVE_RESULT (2) -# ifdef USE_AS_STRNCPY - sub $3, %ebx - lea 3(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit5): - movl (%ecx), %eax - movl %eax, (%edx) - movb 4(%ecx), %al - movb %al, 4(%edx) - SAVE_RESULT (4) -# ifdef USE_AS_STRNCPY - sub $5, %ebx - lea 5(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit6): - movl (%ecx), %eax - movl %eax, (%edx) - movw 4(%ecx), %ax - movw %ax, 4(%edx) - SAVE_RESULT (5) -# ifdef USE_AS_STRNCPY - sub $6, %ebx - lea 6(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit7): - movl (%ecx), %eax - movl %eax, (%edx) - movl 3(%ecx), %eax - movl %eax, 3(%edx) - SAVE_RESULT (6) -# ifdef USE_AS_STRNCPY - sub $7, %ebx - lea 7(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit9): - movlpd (%ecx), %xmm0 - movb 8(%ecx), %al - movlpd %xmm0, (%edx) - movb %al, 8(%edx) - SAVE_RESULT (8) -# ifdef USE_AS_STRNCPY - sub $9, %ebx - lea 9(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit10): - movlpd (%ecx), %xmm0 - movw 8(%ecx), %ax - movlpd %xmm0, (%edx) - movw %ax, 8(%edx) - SAVE_RESULT (9) -# ifdef USE_AS_STRNCPY - sub $10, %ebx - lea 10(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit11): - movlpd (%ecx), %xmm0 - movl 7(%ecx), %eax - movlpd %xmm0, (%edx) - movl %eax, 7(%edx) - SAVE_RESULT (10) -# ifdef USE_AS_STRNCPY - sub $11, %ebx - lea 11(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit13): - movlpd (%ecx), %xmm0 - movlpd 5(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 5(%edx) - SAVE_RESULT (12) -# ifdef USE_AS_STRNCPY - sub $13, %ebx - lea 13(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit14): - movlpd (%ecx), %xmm0 - movlpd 6(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 6(%edx) - SAVE_RESULT (13) -# ifdef USE_AS_STRNCPY - sub $14, %ebx - lea 14(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - - .p2align 4 -L(Exit15): - movlpd (%ecx), %xmm0 - movlpd 7(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 7(%edx) - SAVE_RESULT (14) -# ifdef USE_AS_STRNCPY - sub $15, %ebx - lea 15(%edx), %ecx - jnz L(StrncpyFillTailWithZero1) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN1 - -CFI_POP (%edi) - -# ifdef USE_AS_STRNCPY - .p2align 4 -L(Fill0): - RETURN - - .p2align 4 -L(Fill1): - movb %dl, (%ecx) - RETURN - - .p2align 4 -L(Fill2): - movw %dx, (%ecx) - RETURN - - .p2align 4 -L(Fill3): - movw %dx, (%ecx) - movb %dl, 2(%ecx) - RETURN - - .p2align 4 -L(Fill4): - movl %edx, (%ecx) - RETURN - - .p2align 4 -L(Fill5): - movl %edx, (%ecx) - movb %dl, 4(%ecx) - RETURN - - .p2align 4 -L(Fill6): - movl %edx, (%ecx) - movw %dx, 4(%ecx) - RETURN - - .p2align 4 -L(Fill7): - movl %edx, (%ecx) - movl %edx, 3(%ecx) - RETURN - - .p2align 4 -L(Fill8): - movlpd %xmm0, (%ecx) - RETURN - - .p2align 4 -L(Fill9): - movlpd %xmm0, (%ecx) - movb %dl, 8(%ecx) - RETURN - - .p2align 4 -L(Fill10): - movlpd %xmm0, (%ecx) - movw %dx, 8(%ecx) - RETURN - - .p2align 4 -L(Fill11): - movlpd %xmm0, (%ecx) - movl %edx, 7(%ecx) - RETURN - - .p2align 4 -L(Fill12): - movlpd %xmm0, (%ecx) - movl %edx, 8(%ecx) - RETURN - - .p2align 4 -L(Fill13): - movlpd %xmm0, (%ecx) - movlpd %xmm0, 5(%ecx) - RETURN - - .p2align 4 -L(Fill14): - movlpd %xmm0, (%ecx) - movlpd %xmm0, 6(%ecx) - RETURN - - .p2align 4 -L(Fill15): - movlpd %xmm0, (%ecx) - movlpd %xmm0, 7(%ecx) - RETURN - - .p2align 4 -L(Fill16): - movlpd %xmm0, (%ecx) - movlpd %xmm0, 8(%ecx) - RETURN - - .p2align 4 -L(StrncpyFillExit1): - lea 16(%ebx), %ebx -L(FillFrom1To16Bytes): - test %ebx, %ebx - jz L(Fill0) - cmp $16, %ebx - je L(Fill16) - cmp $8, %ebx - je L(Fill8) - jg L(FillMore8) - cmp $4, %ebx - je L(Fill4) - jg L(FillMore4) - cmp $2, %ebx - jl L(Fill1) - je L(Fill2) - jg L(Fill3) -L(FillMore8): /* but less than 16 */ - cmp $12, %ebx - je L(Fill12) - jl L(FillLess12) - cmp $14, %ebx - jl L(Fill13) - je L(Fill14) - jg L(Fill15) -L(FillMore4): /* but less than 8 */ - cmp $6, %ebx - jl L(Fill5) - je L(Fill6) - jg L(Fill7) -L(FillLess12): /* but more than 8 */ - cmp $10, %ebx - jl L(Fill9) - je L(Fill10) - jmp L(Fill11) - - CFI_PUSH(%edi) - - .p2align 4 -L(StrncpyFillTailWithZero1): - POP (%edi) -L(StrncpyFillTailWithZero): - pxor %xmm0, %xmm0 - xor %edx, %edx - sub $16, %ebx - jbe L(StrncpyFillExit1) - - movlpd %xmm0, (%ecx) - movlpd %xmm0, 8(%ecx) - - lea 16(%ecx), %ecx - - mov %ecx, %edx - and $0xf, %edx - sub %edx, %ecx - add %edx, %ebx - xor %edx, %edx - sub $64, %ebx - jb L(StrncpyFillLess64) - -L(StrncpyFillLoopMovdqa): - movdqa %xmm0, (%ecx) - movdqa %xmm0, 16(%ecx) - movdqa %xmm0, 32(%ecx) - movdqa %xmm0, 48(%ecx) - lea 64(%ecx), %ecx - sub $64, %ebx - jae L(StrncpyFillLoopMovdqa) - -L(StrncpyFillLess64): - add $32, %ebx - jl L(StrncpyFillLess32) - movdqa %xmm0, (%ecx) - movdqa %xmm0, 16(%ecx) - lea 32(%ecx), %ecx - sub $16, %ebx - jl L(StrncpyFillExit1) - movdqa %xmm0, (%ecx) - lea 16(%ecx), %ecx - jmp L(FillFrom1To16Bytes) - -L(StrncpyFillLess32): - add $16, %ebx - jl L(StrncpyFillExit1) - movdqa %xmm0, (%ecx) - lea 16(%ecx), %ecx - jmp L(FillFrom1To16Bytes) -# endif - - .p2align 4 -L(ExitTail1): - movb (%ecx), %al - movb %al, (%edx) - SAVE_RESULT_TAIL (0) -# ifdef USE_AS_STRNCPY - sub $1, %ebx - lea 1(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail2): - movw (%ecx), %ax - movw %ax, (%edx) - SAVE_RESULT_TAIL (1) -# ifdef USE_AS_STRNCPY - sub $2, %ebx - lea 2(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail3): - movw (%ecx), %ax - movw %ax, (%edx) - movb 2(%ecx), %al - movb %al, 2(%edx) - SAVE_RESULT_TAIL (2) -# ifdef USE_AS_STRNCPY - sub $3, %ebx - lea 3(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail4): - movl (%ecx), %eax - movl %eax, (%edx) - SAVE_RESULT_TAIL (3) -# ifdef USE_AS_STRNCPY - sub $4, %ebx - lea 4(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail5): - movl (%ecx), %eax - movl %eax, (%edx) - movb 4(%ecx), %al - movb %al, 4(%edx) - SAVE_RESULT_TAIL (4) -# ifdef USE_AS_STRNCPY - sub $5, %ebx - lea 5(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail6): - movl (%ecx), %eax - movl %eax, (%edx) - movw 4(%ecx), %ax - movw %ax, 4(%edx) - SAVE_RESULT_TAIL (5) -# ifdef USE_AS_STRNCPY - sub $6, %ebx - lea 6(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail7): - movl (%ecx), %eax - movl %eax, (%edx) - movl 3(%ecx), %eax - movl %eax, 3(%edx) - SAVE_RESULT_TAIL (6) -# ifdef USE_AS_STRNCPY - sub $7, %ebx - lea 7(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail8): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - SAVE_RESULT_TAIL (7) -# ifdef USE_AS_STRNCPY - sub $8, %ebx - lea 8(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# endif - RETURN - - .p2align 4 -L(ExitTail9): - movlpd (%ecx), %xmm0 - movb 8(%ecx), %al - movlpd %xmm0, (%edx) - movb %al, 8(%edx) - SAVE_RESULT_TAIL (8) -# ifdef USE_AS_STRNCPY - sub $9, %ebx - lea 9(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail10): - movlpd (%ecx), %xmm0 - movw 8(%ecx), %ax - movlpd %xmm0, (%edx) - movw %ax, 8(%edx) - SAVE_RESULT_TAIL (9) -# ifdef USE_AS_STRNCPY - sub $10, %ebx - lea 10(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail11): - movlpd (%ecx), %xmm0 - movl 7(%ecx), %eax - movlpd %xmm0, (%edx) - movl %eax, 7(%edx) - SAVE_RESULT_TAIL (10) -# ifdef USE_AS_STRNCPY - sub $11, %ebx - lea 11(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail12): - movlpd (%ecx), %xmm0 - movl 8(%ecx), %eax - movlpd %xmm0, (%edx) - movl %eax, 8(%edx) - SAVE_RESULT_TAIL (11) -# ifdef USE_AS_STRNCPY - sub $12, %ebx - lea 12(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail13): - movlpd (%ecx), %xmm0 - movlpd 5(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 5(%edx) - SAVE_RESULT_TAIL (12) -# ifdef USE_AS_STRNCPY - sub $13, %ebx - lea 13(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail14): - movlpd (%ecx), %xmm0 - movlpd 6(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 6(%edx) - SAVE_RESULT_TAIL (13) -# ifdef USE_AS_STRNCPY - sub $14, %ebx - lea 14(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN - - .p2align 4 -L(ExitTail15): - movlpd (%ecx), %xmm0 - movlpd 7(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 7(%edx) - SAVE_RESULT_TAIL (14) -# ifdef USE_AS_STRNCPY - sub $15, %ebx - lea 15(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# endif - RETURN - - .p2align 4 -L(ExitTail16): - movdqu (%ecx), %xmm0 - movdqu %xmm0, (%edx) - SAVE_RESULT_TAIL (15) -# ifdef USE_AS_STRNCPY - sub $16, %ebx - lea 16(%edx), %ecx - jnz L(StrncpyFillTailWithZero) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif -# endif - RETURN -# endif - -# ifdef USE_AS_STRNCPY -# ifndef USE_AS_STRCAT - CFI_PUSH (%esi) - CFI_PUSH (%edi) -# endif - .p2align 4 -L(StrncpyLeaveCase2OrCase3): - test %eax, %eax - jnz L(Aligned64LeaveCase2) - -L(Aligned64LeaveCase3): - add $48, %ebx - jle L(CopyFrom1To16BytesCase3) - movaps %xmm4, -64(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase3) - movaps %xmm5, -48(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase3) - movaps %xmm6, -32(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx - jmp L(CopyFrom1To16BytesCase3) - -L(Aligned64LeaveCase2): - pcmpeqb %xmm4, %xmm0 - pmovmskb %xmm0, %eax - add $48, %ebx - jle L(CopyFrom1To16BytesCase2OrCase3) - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm5, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm4, -64(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm6, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm5, -48(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(CopyFrom1To16BytesCase2OrCase3) - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - pcmpeqb %xmm7, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm6, -32(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx - jmp L(CopyFrom1To16BytesCase2) - -/*--------------------------------------------------*/ - .p2align 4 -L(StrncpyExit1Case2OrCase3): - movlpd (%ecx), %xmm0 - movlpd 7(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 7(%edx) - mov $15, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit2Case2OrCase3): - movlpd (%ecx), %xmm0 - movlpd 6(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 6(%edx) - mov $14, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit3Case2OrCase3): - movlpd (%ecx), %xmm0 - movlpd 5(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 5(%edx) - mov $13, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit4Case2OrCase3): - movlpd (%ecx), %xmm0 - movl 8(%ecx), %esi - movlpd %xmm0, (%edx) - movl %esi, 8(%edx) - mov $12, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit5Case2OrCase3): - movlpd (%ecx), %xmm0 - movl 7(%ecx), %esi - movlpd %xmm0, (%edx) - movl %esi, 7(%edx) - mov $11, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit6Case2OrCase3): - movlpd (%ecx), %xmm0 - movl 6(%ecx), %esi - movlpd %xmm0, (%edx) - movl %esi, 6(%edx) - mov $10, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit7Case2OrCase3): - movlpd (%ecx), %xmm0 - movl 5(%ecx), %esi - movlpd %xmm0, (%edx) - movl %esi, 5(%edx) - mov $9, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit8Case2OrCase3): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - mov $8, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit9Case2OrCase3): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - mov $7, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit10Case2OrCase3): - movlpd -1(%ecx), %xmm0 - movlpd %xmm0, -1(%edx) - mov $6, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit11Case2OrCase3): - movlpd -2(%ecx), %xmm0 - movlpd %xmm0, -2(%edx) - mov $5, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit12Case2OrCase3): - movl (%ecx), %esi - movl %esi, (%edx) - mov $4, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit13Case2OrCase3): - movl -1(%ecx), %esi - movl %esi, -1(%edx) - mov $3, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit14Case2OrCase3): - movl -2(%ecx), %esi - movl %esi, -2(%edx) - mov $2, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - - .p2align 4 -L(StrncpyExit15Case2OrCase3): - movl -3(%ecx), %esi - movl %esi, -3(%edx) - mov $1, %esi - test %eax, %eax - jnz L(CopyFrom1To16BytesCase2) - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave1): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit1) - palignr $1, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 31(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit1) - palignr $1, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit1) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit1) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit1): - lea 15(%edx, %esi), %edx - lea 15(%ecx, %esi), %ecx - movdqu -16(%ecx), %xmm0 - xor %esi, %esi - movdqu %xmm0, -16(%edx) - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave2): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit2) - palignr $2, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 30(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit2) - palignr $2, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit2) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit2) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit2): - lea 14(%edx, %esi), %edx - lea 14(%ecx, %esi), %ecx - movdqu -16(%ecx), %xmm0 - xor %esi, %esi - movdqu %xmm0, -16(%edx) - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave3): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit3) - palignr $3, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 29(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit3) - palignr $3, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit3) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit3) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit3): - lea 13(%edx, %esi), %edx - lea 13(%ecx, %esi), %ecx - movdqu -16(%ecx), %xmm0 - xor %esi, %esi - movdqu %xmm0, -16(%edx) - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave4): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit4) - palignr $4, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 28(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit4) - palignr $4, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit4) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit4) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit4): - lea 12(%edx, %esi), %edx - lea 12(%ecx, %esi), %ecx - movlpd -12(%ecx), %xmm0 - movl -4(%ecx), %eax - movlpd %xmm0, -12(%edx) - movl %eax, -4(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave5): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit5) - palignr $5, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 27(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit5) - palignr $5, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit5) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit5) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit5): - lea 11(%edx, %esi), %edx - lea 11(%ecx, %esi), %ecx - movlpd -11(%ecx), %xmm0 - movl -4(%ecx), %eax - movlpd %xmm0, -11(%edx) - movl %eax, -4(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave6): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit6) - palignr $6, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 26(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit6) - palignr $6, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit6) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit6) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit6): - lea 10(%edx, %esi), %edx - lea 10(%ecx, %esi), %ecx - - movlpd -10(%ecx), %xmm0 - movw -2(%ecx), %ax - movlpd %xmm0, -10(%edx) - movw %ax, -2(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave7): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit7) - palignr $7, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 25(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit7) - palignr $7, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit7) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit7) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit7): - lea 9(%edx, %esi), %edx - lea 9(%ecx, %esi), %ecx - - movlpd -9(%ecx), %xmm0 - movb -1(%ecx), %ah - movlpd %xmm0, -9(%edx) - movb %ah, -1(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave8): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit8) - palignr $8, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 24(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit8) - palignr $8, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit8) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit8) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit8): - lea 8(%edx, %esi), %edx - lea 8(%ecx, %esi), %ecx - movlpd -8(%ecx), %xmm0 - movlpd %xmm0, -8(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave9): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit9) - palignr $9, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 23(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit9) - palignr $9, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit9) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit9) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit9): - lea 7(%edx, %esi), %edx - lea 7(%ecx, %esi), %ecx - - movlpd -8(%ecx), %xmm0 - movlpd %xmm0, -8(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave10): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit10) - palignr $10, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 22(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit10) - palignr $10, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit10) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit10) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit10): - lea 6(%edx, %esi), %edx - lea 6(%ecx, %esi), %ecx - - movlpd -8(%ecx), %xmm0 - movlpd %xmm0, -8(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave11): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit11) - palignr $11, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 21(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit11) - palignr $11, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit11) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit11) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit11): - lea 5(%edx, %esi), %edx - lea 5(%ecx, %esi), %ecx - movl -5(%ecx), %esi - movb -1(%ecx), %ah - movl %esi, -5(%edx) - movb %ah, -1(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave12): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit12) - palignr $12, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 20(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit12) - palignr $12, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit12) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit12) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit12): - lea 4(%edx, %esi), %edx - lea 4(%ecx, %esi), %ecx - movl -4(%ecx), %eax - movl %eax, -4(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave13): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit13) - palignr $13, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 19(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit13) - palignr $13, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit13) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit13) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit13): - lea 3(%edx, %esi), %edx - lea 3(%ecx, %esi), %ecx - - movl -4(%ecx), %eax - movl %eax, -4(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave14): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit14) - palignr $14, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 18(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit14) - palignr $14, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit14) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit14) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit14): - lea 2(%edx, %esi), %edx - lea 2(%ecx, %esi), %ecx - movw -2(%ecx), %ax - movw %ax, -2(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) - -L(StrncpyLeave15): - movaps %xmm2, %xmm3 - add $48, %ebx - jle L(StrncpyExit15) - palignr $15, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 17(%ecx), %xmm2 - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit15) - palignr $15, %xmm3, %xmm2 - movaps %xmm2, 16(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit15) - movaps %xmm4, 32(%edx) - lea 16(%esi), %esi - sub $16, %ebx - jbe L(StrncpyExit15) - movaps %xmm5, 48(%edx) - lea 16(%esi), %esi - lea -16(%ebx), %ebx -L(StrncpyExit15): - lea 1(%edx, %esi), %edx - lea 1(%ecx, %esi), %ecx - movb -1(%ecx), %ah - movb %ah, -1(%edx) - xor %esi, %esi - jmp L(CopyFrom1To16BytesCase3) -# endif - -# ifndef USE_AS_STRCAT -# ifdef USE_AS_STRNCPY - CFI_POP (%esi) - CFI_POP (%edi) - - .p2align 4 -L(ExitTail0): - movl %edx, %eax - RETURN - - .p2align 4 -L(StrncpyExit15Bytes): - cmp $12, %ebx - jbe L(StrncpyExit12Bytes) - cmpb $0, 8(%ecx) - jz L(ExitTail9) - cmpb $0, 9(%ecx) - jz L(ExitTail10) - cmpb $0, 10(%ecx) - jz L(ExitTail11) - cmpb $0, 11(%ecx) - jz L(ExitTail12) - cmp $13, %ebx - je L(ExitTail13) - cmpb $0, 12(%ecx) - jz L(ExitTail13) - cmp $14, %ebx - je L(ExitTail14) - cmpb $0, 13(%ecx) - jz L(ExitTail14) - movlpd (%ecx), %xmm0 - movlpd 7(%ecx), %xmm1 - movlpd %xmm0, (%edx) - movlpd %xmm1, 7(%edx) -# ifdef USE_AS_STPCPY - lea 14(%edx), %eax - cmpb $1, (%eax) - sbb $-1, %eax -# else - movl %edx, %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit12Bytes): - cmp $9, %ebx - je L(ExitTail9) - cmpb $0, 8(%ecx) - jz L(ExitTail9) - cmp $10, %ebx - je L(ExitTail10) - cmpb $0, 9(%ecx) - jz L(ExitTail10) - cmp $11, %ebx - je L(ExitTail11) - cmpb $0, 10(%ecx) - jz L(ExitTail11) - movlpd (%ecx), %xmm0 - movl 8(%ecx), %eax - movlpd %xmm0, (%edx) - movl %eax, 8(%edx) - SAVE_RESULT_TAIL (11) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit8Bytes): - cmp $4, %ebx - jbe L(StrncpyExit4Bytes) - cmpb $0, (%ecx) - jz L(ExitTail1) - cmpb $0, 1(%ecx) - jz L(ExitTail2) - cmpb $0, 2(%ecx) - jz L(ExitTail3) - cmpb $0, 3(%ecx) - jz L(ExitTail4) - - cmp $5, %ebx - je L(ExitTail5) - cmpb $0, 4(%ecx) - jz L(ExitTail5) - cmp $6, %ebx - je L(ExitTail6) - cmpb $0, 5(%ecx) - jz L(ExitTail6) - cmp $7, %ebx - je L(ExitTail7) - cmpb $0, 6(%ecx) - jz L(ExitTail7) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) -# ifdef USE_AS_STPCPY - lea 7(%edx), %eax - cmpb $1, (%eax) - sbb $-1, %eax -# else - movl %edx, %eax -# endif - RETURN - - .p2align 4 -L(StrncpyExit4Bytes): - test %ebx, %ebx - jz L(ExitTail0) - cmp $1, %ebx - je L(ExitTail1) - cmpb $0, (%ecx) - jz L(ExitTail1) - cmp $2, %ebx - je L(ExitTail2) - cmpb $0, 1(%ecx) - jz L(ExitTail2) - cmp $3, %ebx - je L(ExitTail3) - cmpb $0, 2(%ecx) - jz L(ExitTail3) - movl (%ecx), %eax - movl %eax, (%edx) - SAVE_RESULT_TAIL (3) -# ifdef USE_AS_STPCPY - cmpb $1, (%eax) - sbb $-1, %eax -# endif - RETURN -# endif - -END (STRCPY) -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/strcpy.S b/sysdeps/i386/i686/multiarch/strcpy.S deleted file mode 100644 index ffbc03c6d5..0000000000 --- a/sysdeps/i386/i686/multiarch/strcpy.S +++ /dev/null @@ -1,116 +0,0 @@ -/* Multiple versions of strcpy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if !defined (USE_AS_STPCPY) && !defined (USE_AS_STRNCPY) -# ifndef STRCPY -# define STRCPY strcpy -# endif -#endif - -#ifdef USE_AS_STPCPY -# ifdef USE_AS_STRNCPY -# define STRCPY_SSSE3 __stpncpy_ssse3 -# define STRCPY_SSE2 __stpncpy_sse2 -# define STRCPY_IA32 __stpncpy_ia32 -# define __GI_STRCPY __GI_stpncpy -# define __GI___STRCPY __GI___stpncpy -# else -# define STRCPY_SSSE3 __stpcpy_ssse3 -# define STRCPY_SSE2 __stpcpy_sse2 -# define STRCPY_IA32 __stpcpy_ia32 -# define __GI_STRCPY __GI_stpcpy -# define __GI___STRCPY __GI___stpcpy -# endif -#else -# ifdef USE_AS_STRNCPY -# define STRCPY_SSSE3 __strncpy_ssse3 -# define STRCPY_SSE2 __strncpy_sse2 -# define STRCPY_IA32 __strncpy_ia32 -# define __GI_STRCPY __GI_strncpy -# else -# define STRCPY_SSSE3 __strcpy_ssse3 -# define STRCPY_SSE2 __strcpy_sse2 -# define STRCPY_IA32 __strcpy_ia32 -# define __GI_STRCPY __GI_strcpy -# endif -#endif - - -/* Define multiple versions only for the definition in libc. Don't - define multiple versions for strncpy in static library since we - need strncpy before the initialization happened. */ -#if IS_IN (libc) - - .text -ENTRY(STRCPY) - .type STRCPY, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (STRCPY_IA32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (STRCPY_SSE2) - HAS_ARCH_FEATURE (Fast_Unaligned_Load) - jnz 2f - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (STRCPY_SSSE3) -2: ret -END(STRCPY) - -# undef ENTRY -# define ENTRY(name) \ - .type STRCPY_IA32, @function; \ - .align 16; \ - .globl STRCPY_IA32; \ - .hidden STRCPY_IA32; \ - STRCPY_IA32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size STRCPY_IA32, .-STRCPY_IA32 - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* It doesn't make sense to send libc-internal strcpy calls through a PLT. - The speedup we get from using SSSE3 instruction is likely eaten away - by the indirect call in the PLT. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_STRCPY; __GI_STRCPY = STRCPY_IA32 -# undef libc_hidden_def -# define libc_hidden_def(name) \ - .globl __GI___STRCPY; __GI___STRCPY = STRCPY_IA32 - -# endif -#endif - -#ifdef USE_AS_STPCPY -# ifdef USE_AS_STRNCPY -# include "../../stpncpy.S" -# else -# include "../../i586/stpcpy.S" -# endif -#else -# ifndef USE_AS_STRNCPY -# include "../../i586/strcpy.S" -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/strcspn-c.c b/sysdeps/i386/i686/multiarch/strcspn-c.c deleted file mode 100644 index 6d61e190a8..0000000000 --- a/sysdeps/i386/i686/multiarch/strcspn-c.c +++ /dev/null @@ -1,2 +0,0 @@ -#define __strcspn_sse2 __strcspn_ia32 -#include <sysdeps/x86_64/multiarch/strcspn-c.c> diff --git a/sysdeps/i386/i686/multiarch/strcspn.S b/sysdeps/i386/i686/multiarch/strcspn.S deleted file mode 100644 index 21e5093924..0000000000 --- a/sysdeps/i386/i686/multiarch/strcspn.S +++ /dev/null @@ -1,75 +0,0 @@ -/* Multiple versions of strcspn - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2009-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <config.h> -#include <sysdep.h> -#include <init-arch.h> - -#ifdef USE_AS_STRPBRK -#define STRCSPN_SSE42 __strpbrk_sse42 -#define STRCSPN_IA32 __strpbrk_ia32 -#define __GI_STRCSPN __GI_strpbrk -#else -#ifndef STRCSPN -#define STRCSPN strcspn -#define STRCSPN_SSE42 __strcspn_sse42 -#define STRCSPN_IA32 __strcspn_ia32 -#define __GI_STRCSPN __GI_strcspn -#endif -#endif - -/* Define multiple versions only for the definition in libc. Don't - define multiple versions for strpbrk in static library since we - need strpbrk before the initialization happened. */ -#if (defined SHARED || !defined USE_AS_STRPBRK) && IS_IN (libc) - .text -ENTRY(STRCSPN) - .type STRCSPN, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (STRCSPN_IA32) - HAS_CPU_FEATURE (SSE4_2) - jz 2f - LOAD_FUNC_GOT_EAX (STRCSPN_SSE42) -2: ret -END(STRCSPN) - -# undef ENTRY -# define ENTRY(name) \ - .type STRCSPN_IA32, @function; \ - .globl STRCSPN_IA32; \ - .p2align 4; \ - STRCSPN_IA32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size STRCSPN_IA32, .-STRCSPN_IA32 -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_STRCSPN; __GI_STRCSPN = STRCSPN_IA32 -#endif - -#ifdef USE_AS_STRPBRK -#include "../../strpbrk.S" -#else -#include "../../strcspn.S" -#endif diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S deleted file mode 100644 index d3ea864bab..0000000000 --- a/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S +++ /dev/null @@ -1,125 +0,0 @@ -/* strlen with SSE2 and BSF - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if defined SHARED && IS_IN (libc) - -#include <sysdep.h> - -#define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -#define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -#define PUSH(REG) pushl REG; CFI_PUSH (REG) -#define POP(REG) popl REG; CFI_POP (REG) -#define PARMS 4 + 8 /* Preserve ESI and EDI. */ -#define STR PARMS -#define ENTRANCE PUSH (%esi); PUSH (%edi); cfi_remember_state -#define RETURN POP (%edi); POP (%esi); ret; \ - cfi_restore_state; cfi_remember_state - - .text -ENTRY ( __strlen_sse2_bsf) - ENTRANCE - mov STR(%esp), %edi - xor %eax, %eax - mov %edi, %ecx - and $0x3f, %ecx - pxor %xmm0, %xmm0 - cmp $0x30, %ecx - ja L(next) - movdqu (%edi), %xmm1 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit_less16) - mov %edi, %eax - and $-16, %eax - jmp L(align16_start) -L(next): - - mov %edi, %eax - and $-16, %eax - pcmpeqb (%eax), %xmm0 - mov $-1, %esi - sub %eax, %ecx - shl %cl, %esi - pmovmskb %xmm0, %edx - and %esi, %edx - jnz L(exit) -L(align16_start): - pxor %xmm0, %xmm0 - pxor %xmm1, %xmm1 - pxor %xmm2, %xmm2 - pxor %xmm3, %xmm3 - .p2align 4 -L(align16_loop): - pcmpeqb 16(%eax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit16) - - pcmpeqb 32(%eax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - jnz L(exit32) - - pcmpeqb 48(%eax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - jnz L(exit48) - - pcmpeqb 64(%eax), %xmm3 - pmovmskb %xmm3, %edx - lea 64(%eax), %eax - test %edx, %edx - jz L(align16_loop) -L(exit): - sub %edi, %eax -L(exit_less16): - bsf %edx, %edx - add %edx, %eax - RETURN -L(exit16): - sub %edi, %eax - bsf %edx, %edx - add %edx, %eax - add $16, %eax - RETURN -L(exit32): - sub %edi, %eax - bsf %edx, %edx - add %edx, %eax - add $32, %eax - RETURN -L(exit48): - sub %edi, %eax - bsf %edx, %edx - add %edx, %eax - add $48, %eax - POP (%edi) - POP (%esi) - ret - -END ( __strlen_sse2_bsf) - -#endif diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2.S b/sysdeps/i386/i686/multiarch/strlen-sse2.S deleted file mode 100644 index 36fc1469d0..0000000000 --- a/sysdeps/i386/i686/multiarch/strlen-sse2.S +++ /dev/null @@ -1,695 +0,0 @@ -/* strlen with SSE2 - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* for strlen only SHARED version is optimized, for strcat, strncat, strnlen both STATIC and SHARED are optimized */ - -#if (defined USE_AS_STRNLEN || defined USE_AS_STRCAT || defined SHARED) && IS_IN (libc) - -# ifndef USE_AS_STRCAT - -# include <sysdep.h> -# define PARMS 4 -# define STR PARMS -# define RETURN ret - -# ifdef USE_AS_STRNLEN -# define LEN PARMS + 8 -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) -# undef RETURN -# define RETURN POP (%edi); CFI_PUSH(%edi); ret -# endif - -# ifndef STRLEN -# define STRLEN __strlen_sse2 -# endif - - atom_text_section -ENTRY (STRLEN) - mov STR(%esp), %edx -# ifdef USE_AS_STRNLEN - PUSH (%edi) - movl LEN(%esp), %edi - sub $4, %edi - jbe L(len_less4_prolog) -# endif -# endif - xor %eax, %eax - cmpb $0, (%edx) - jz L(exit_tail0) - cmpb $0, 1(%edx) - jz L(exit_tail1) - cmpb $0, 2(%edx) - jz L(exit_tail2) - cmpb $0, 3(%edx) - jz L(exit_tail3) - -# ifdef USE_AS_STRNLEN - sub $4, %edi - jbe L(len_less8_prolog) -# endif - - cmpb $0, 4(%edx) - jz L(exit_tail4) - cmpb $0, 5(%edx) - jz L(exit_tail5) - cmpb $0, 6(%edx) - jz L(exit_tail6) - cmpb $0, 7(%edx) - jz L(exit_tail7) - -# ifdef USE_AS_STRNLEN - sub $4, %edi - jbe L(len_less12_prolog) -# endif - - cmpb $0, 8(%edx) - jz L(exit_tail8) - cmpb $0, 9(%edx) - jz L(exit_tail9) - cmpb $0, 10(%edx) - jz L(exit_tail10) - cmpb $0, 11(%edx) - jz L(exit_tail11) - -# ifdef USE_AS_STRNLEN - sub $4, %edi - jbe L(len_less16_prolog) -# endif - - cmpb $0, 12(%edx) - jz L(exit_tail12) - cmpb $0, 13(%edx) - jz L(exit_tail13) - cmpb $0, 14(%edx) - jz L(exit_tail14) - cmpb $0, 15(%edx) - jz L(exit_tail15) - - pxor %xmm0, %xmm0 - lea 16(%edx), %eax - mov %eax, %ecx - and $-16, %eax - -# ifdef USE_AS_STRNLEN - and $15, %edx - add %edx, %edi - sub $64, %edi - jbe L(len_less64) -# endif - - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - pxor %xmm1, %xmm1 - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edx - pxor %xmm2, %xmm2 - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm2 - pmovmskb %xmm2, %edx - pxor %xmm3, %xmm3 - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - -# ifdef USE_AS_STRNLEN - sub $64, %edi - jbe L(len_less64) -# endif - - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - -# ifdef USE_AS_STRNLEN - sub $64, %edi - jbe L(len_less64) -# endif - - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - -# ifdef USE_AS_STRNLEN - sub $64, %edi - jbe L(len_less64) -# endif - - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - -# ifdef USE_AS_STRNLEN - mov %eax, %edx - and $63, %edx - add %edx, %edi -# endif - - and $-0x40, %eax - - .p2align 4 -L(aligned_64_loop): -# ifdef USE_AS_STRNLEN - sub $64, %edi - jbe L(len_less64) -# endif - movaps (%eax), %xmm0 - movaps 16(%eax), %xmm1 - movaps 32(%eax), %xmm2 - movaps 48(%eax), %xmm6 - pminub %xmm1, %xmm0 - pminub %xmm6, %xmm2 - pminub %xmm0, %xmm2 - pcmpeqb %xmm3, %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - lea 64(%eax), %eax - jz L(aligned_64_loop) - - pcmpeqb -64(%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 48(%ecx), %ecx - jnz L(exit) - - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea -16(%ecx), %ecx - jnz L(exit) - - pcmpeqb -32(%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea -16(%ecx), %ecx - jnz L(exit) - - pcmpeqb %xmm6, %xmm3 - pmovmskb %xmm3, %edx - lea -16(%ecx), %ecx -L(exit): - sub %ecx, %eax - test %dl, %dl - jz L(exit_high) - - mov %dl, %cl - and $15, %cl - jz L(exit_8) - test $0x01, %dl - jnz L(exit_tail0) - test $0x02, %dl - jnz L(exit_tail1) - test $0x04, %dl - jnz L(exit_tail2) - add $3, %eax - RETURN - - .p2align 4 -L(exit_8): - test $0x10, %dl - jnz L(exit_tail4) - test $0x20, %dl - jnz L(exit_tail5) - test $0x40, %dl - jnz L(exit_tail6) - add $7, %eax - RETURN - - .p2align 4 -L(exit_high): - mov %dh, %ch - and $15, %ch - jz L(exit_high_8) - test $0x01, %dh - jnz L(exit_tail8) - test $0x02, %dh - jnz L(exit_tail9) - test $0x04, %dh - jnz L(exit_tail10) - add $11, %eax - RETURN - - .p2align 4 -L(exit_high_8): - test $0x10, %dh - jnz L(exit_tail12) - test $0x20, %dh - jnz L(exit_tail13) - test $0x40, %dh - jnz L(exit_tail14) - add $15, %eax -L(exit_tail0): - RETURN - -# ifdef USE_AS_STRNLEN - - .p2align 4 -L(len_less64): - pxor %xmm0, %xmm0 - add $64, %edi - - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - pxor %xmm1, %xmm1 - lea 16(%eax), %eax - test %edx, %edx - jnz L(strnlen_exit) - - sub $16, %edi - jbe L(return_start_len) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edx - lea 16(%eax), %eax - test %edx, %edx - jnz L(strnlen_exit) - - sub $16, %edi - jbe L(return_start_len) - - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - lea 16(%eax), %eax - test %edx, %edx - jnz L(strnlen_exit) - - sub $16, %edi - jbe L(return_start_len) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edx - lea 16(%eax), %eax - test %edx, %edx - jnz L(strnlen_exit) - - movl LEN(%esp), %eax - RETURN - - .p2align 4 -L(strnlen_exit): - sub %ecx, %eax - - test %dl, %dl - jz L(strnlen_exit_high) - mov %dl, %cl - and $15, %cl - jz L(strnlen_exit_8) - test $0x01, %dl - jnz L(exit_tail0) - test $0x02, %dl - jnz L(strnlen_exit_tail1) - test $0x04, %dl - jnz L(strnlen_exit_tail2) - sub $4, %edi - jb L(return_start_len) - lea 3(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_8): - test $0x10, %dl - jnz L(strnlen_exit_tail4) - test $0x20, %dl - jnz L(strnlen_exit_tail5) - test $0x40, %dl - jnz L(strnlen_exit_tail6) - sub $8, %edi - jb L(return_start_len) - lea 7(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_high): - mov %dh, %ch - and $15, %ch - jz L(strnlen_exit_high_8) - test $0x01, %dh - jnz L(strnlen_exit_tail8) - test $0x02, %dh - jnz L(strnlen_exit_tail9) - test $0x04, %dh - jnz L(strnlen_exit_tail10) - sub $12, %edi - jb L(return_start_len) - lea 11(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_high_8): - test $0x10, %dh - jnz L(strnlen_exit_tail12) - test $0x20, %dh - jnz L(strnlen_exit_tail13) - test $0x40, %dh - jnz L(strnlen_exit_tail14) - sub $16, %edi - jb L(return_start_len) - lea 15(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail1): - sub $2, %edi - jb L(return_start_len) - lea 1(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail2): - sub $3, %edi - jb L(return_start_len) - lea 2(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail4): - sub $5, %edi - jb L(return_start_len) - lea 4(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail5): - sub $6, %edi - jb L(return_start_len) - lea 5(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail6): - sub $7, %edi - jb L(return_start_len) - lea 6(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail8): - sub $9, %edi - jb L(return_start_len) - lea 8(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail9): - sub $10, %edi - jb L(return_start_len) - lea 9(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail10): - sub $11, %edi - jb L(return_start_len) - lea 10(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail12): - sub $13, %edi - jb L(return_start_len) - lea 12(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail13): - sub $14, %edi - jb L(return_start_len) - lea 13(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail14): - sub $15, %edi - jb L(return_start_len) - lea 14(%eax), %eax - RETURN - - .p2align 4 -L(return_start_len): - movl LEN(%esp), %eax - RETURN - -/* for prolog only */ - - .p2align 4 -L(len_less4_prolog): - xor %eax, %eax - - add $4, %edi - jz L(exit_tail0) - - cmpb $0, (%edx) - jz L(exit_tail0) - cmp $1, %edi - je L(exit_tail1) - - cmpb $0, 1(%edx) - jz L(exit_tail1) - cmp $2, %edi - je L(exit_tail2) - - cmpb $0, 2(%edx) - jz L(exit_tail2) - cmp $3, %edi - je L(exit_tail3) - - cmpb $0, 3(%edx) - jz L(exit_tail3) - mov $4, %eax - RETURN - - .p2align 4 -L(len_less8_prolog): - add $4, %edi - - cmpb $0, 4(%edx) - jz L(exit_tail4) - cmp $1, %edi - je L(exit_tail5) - - cmpb $0, 5(%edx) - jz L(exit_tail5) - cmp $2, %edi - je L(exit_tail6) - - cmpb $0, 6(%edx) - jz L(exit_tail6) - cmp $3, %edi - je L(exit_tail7) - - cmpb $0, 7(%edx) - jz L(exit_tail7) - mov $8, %eax - RETURN - - - .p2align 4 -L(len_less12_prolog): - add $4, %edi - - cmpb $0, 8(%edx) - jz L(exit_tail8) - cmp $1, %edi - je L(exit_tail9) - - cmpb $0, 9(%edx) - jz L(exit_tail9) - cmp $2, %edi - je L(exit_tail10) - - cmpb $0, 10(%edx) - jz L(exit_tail10) - cmp $3, %edi - je L(exit_tail11) - - cmpb $0, 11(%edx) - jz L(exit_tail11) - mov $12, %eax - RETURN - - .p2align 4 -L(len_less16_prolog): - add $4, %edi - - cmpb $0, 12(%edx) - jz L(exit_tail12) - cmp $1, %edi - je L(exit_tail13) - - cmpb $0, 13(%edx) - jz L(exit_tail13) - cmp $2, %edi - je L(exit_tail14) - - cmpb $0, 14(%edx) - jz L(exit_tail14) - cmp $3, %edi - je L(exit_tail15) - - cmpb $0, 15(%edx) - jz L(exit_tail15) - mov $16, %eax - RETURN -# endif - - .p2align 4 -L(exit_tail1): - add $1, %eax - RETURN - -L(exit_tail2): - add $2, %eax - RETURN - -L(exit_tail3): - add $3, %eax - RETURN - -L(exit_tail4): - add $4, %eax - RETURN - -L(exit_tail5): - add $5, %eax - RETURN - -L(exit_tail6): - add $6, %eax - RETURN - -L(exit_tail7): - add $7, %eax - RETURN - -L(exit_tail8): - add $8, %eax - RETURN - -L(exit_tail9): - add $9, %eax - RETURN - -L(exit_tail10): - add $10, %eax - RETURN - -L(exit_tail11): - add $11, %eax - RETURN - -L(exit_tail12): - add $12, %eax - RETURN - -L(exit_tail13): - add $13, %eax - RETURN - -L(exit_tail14): - add $14, %eax - RETURN - -L(exit_tail15): - add $15, %eax -# ifndef USE_AS_STRCAT - RETURN -END (STRLEN) -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/strlen.S b/sysdeps/i386/i686/multiarch/strlen.S deleted file mode 100644 index 77cf6bcdb0..0000000000 --- a/sysdeps/i386/i686/multiarch/strlen.S +++ /dev/null @@ -1,60 +0,0 @@ -/* Multiple versions of strlen - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2009-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in libc and for the - DSO. In static binaries, we need strlen before the initialization - happened. */ -#if defined SHARED && IS_IN (libc) - .text -ENTRY(strlen) - .type strlen, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__strlen_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__strlen_sse2_bsf) - HAS_ARCH_FEATURE (Slow_BSF) - jz 2f - LOAD_FUNC_GOT_EAX (__strlen_sse2) -2: ret -END(strlen) - -# undef ENTRY -# define ENTRY(name) \ - .type __strlen_ia32, @function; \ - .globl __strlen_ia32; \ - .p2align 4; \ - __strlen_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __strlen_ia32, .-__strlen_ia32 -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_strlen; __GI_strlen = __strlen_ia32 -#endif - -#include "../../i586/strlen.S" diff --git a/sysdeps/i386/i686/multiarch/strncase-c.c b/sysdeps/i386/i686/multiarch/strncase-c.c deleted file mode 100644 index 76581eb62b..0000000000 --- a/sysdeps/i386/i686/multiarch/strncase-c.c +++ /dev/null @@ -1,8 +0,0 @@ -#include <string.h> - -extern __typeof (strncasecmp) __strncasecmp_nonascii; - -#define __strncasecmp __strncasecmp_nonascii -#include <string/strncase.c> - -strong_alias (__strncasecmp_nonascii, __strncasecmp_ia32) diff --git a/sysdeps/i386/i686/multiarch/strncase.S b/sysdeps/i386/i686/multiarch/strncase.S deleted file mode 100644 index a56e63a566..0000000000 --- a/sysdeps/i386/i686/multiarch/strncase.S +++ /dev/null @@ -1,39 +0,0 @@ -/* Entry point for multi-version x86 strncasecmp. - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY(__strncasecmp) - .type __strncasecmp, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__strncasecmp_ia32) - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__strncasecmp_ssse3) - HAS_CPU_FEATURE (SSE4_2) - jz 2f - HAS_ARCH_FEATURE (Slow_SSE4_2) - jnz 2f - LOAD_FUNC_GOT_EAX (__strncasecmp_sse4_2) -2: ret -END(__strncasecmp) - -weak_alias (__strncasecmp, strncasecmp) diff --git a/sysdeps/i386/i686/multiarch/strncase_l-c.c b/sysdeps/i386/i686/multiarch/strncase_l-c.c deleted file mode 100644 index 7e601af271..0000000000 --- a/sysdeps/i386/i686/multiarch/strncase_l-c.c +++ /dev/null @@ -1,13 +0,0 @@ -#include <string.h> - -extern __typeof (strncasecmp_l) __strncasecmp_l_nonascii; - -#define __strncasecmp_l __strncasecmp_l_nonascii -#define USE_IN_EXTENDED_LOCALE_MODEL 1 -#include <string/strncase.c> - -strong_alias (__strncasecmp_l_nonascii, __strncasecmp_l_ia32) - -/* The needs of strcasecmp in libc are minimal, no need to go through - the IFUNC. */ -strong_alias (__strncasecmp_l_nonascii, __GI___strncasecmp_l) diff --git a/sysdeps/i386/i686/multiarch/strncase_l-sse4.S b/sysdeps/i386/i686/multiarch/strncase_l-sse4.S deleted file mode 100644 index 557210832e..0000000000 --- a/sysdeps/i386/i686/multiarch/strncase_l-sse4.S +++ /dev/null @@ -1,2 +0,0 @@ -#define USE_AS_STRNCASECMP_L 1 -#include "strcmp-sse4.S" diff --git a/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S b/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S deleted file mode 100644 index d438a1ae35..0000000000 --- a/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S +++ /dev/null @@ -1,2 +0,0 @@ -#define USE_AS_STRNCASECMP_L 1 -#include "strcmp-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/strncase_l.S b/sysdeps/i386/i686/multiarch/strncase_l.S deleted file mode 100644 index 8a74ee8574..0000000000 --- a/sysdeps/i386/i686/multiarch/strncase_l.S +++ /dev/null @@ -1,7 +0,0 @@ -/* Multiple versions of strncasecmp_l - All versions must be listed in ifunc-impl-list.c. */ -#define STRCMP __strncasecmp_l -#define USE_AS_STRNCASECMP_L -#include "strcmp.S" - -weak_alias (__strncasecmp_l, strncasecmp_l) diff --git a/sysdeps/i386/i686/multiarch/strncat-c.c b/sysdeps/i386/i686/multiarch/strncat-c.c deleted file mode 100644 index 132a000545..0000000000 --- a/sysdeps/i386/i686/multiarch/strncat-c.c +++ /dev/null @@ -1,8 +0,0 @@ -#define STRNCAT __strncat_ia32 -#ifdef SHARED -#undef libc_hidden_def -#define libc_hidden_def(name) \ - __hidden_ver1 (__strncat_ia32, __GI___strncat, __strncat_ia32); -#endif - -#include "string/strncat.c" diff --git a/sysdeps/i386/i686/multiarch/strncat-sse2.S b/sysdeps/i386/i686/multiarch/strncat-sse2.S deleted file mode 100644 index f1045b72b8..0000000000 --- a/sysdeps/i386/i686/multiarch/strncat-sse2.S +++ /dev/null @@ -1,4 +0,0 @@ -#define STRCAT __strncat_sse2 -#define USE_AS_STRNCAT - -#include "strcat-sse2.S" diff --git a/sysdeps/i386/i686/multiarch/strncat-ssse3.S b/sysdeps/i386/i686/multiarch/strncat-ssse3.S deleted file mode 100644 index 625b90a978..0000000000 --- a/sysdeps/i386/i686/multiarch/strncat-ssse3.S +++ /dev/null @@ -1,4 +0,0 @@ -#define STRCAT __strncat_ssse3 -#define USE_AS_STRNCAT - -#include "strcat-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/strncat.S b/sysdeps/i386/i686/multiarch/strncat.S deleted file mode 100644 index 5c1bf41453..0000000000 --- a/sysdeps/i386/i686/multiarch/strncat.S +++ /dev/null @@ -1,5 +0,0 @@ -/* Multiple versions of strncat - All versions must be listed in ifunc-impl-list.c. */ -#define STRCAT strncat -#define USE_AS_STRNCAT -#include "strcat.S" diff --git a/sysdeps/i386/i686/multiarch/strncmp-c.c b/sysdeps/i386/i686/multiarch/strncmp-c.c deleted file mode 100644 index cc059da494..0000000000 --- a/sysdeps/i386/i686/multiarch/strncmp-c.c +++ /dev/null @@ -1,8 +0,0 @@ -#ifdef SHARED -# define STRNCMP __strncmp_ia32 -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) \ - __hidden_ver1 (__strncmp_ia32, __GI_strncmp, __strncmp_ia32); -#endif - -#include "string/strncmp.c" diff --git a/sysdeps/i386/i686/multiarch/strncmp-sse4.S b/sysdeps/i386/i686/multiarch/strncmp-sse4.S deleted file mode 100644 index cf14dfaf6c..0000000000 --- a/sysdeps/i386/i686/multiarch/strncmp-sse4.S +++ /dev/null @@ -1,5 +0,0 @@ -#ifdef SHARED -# define USE_AS_STRNCMP -# define STRCMP __strncmp_sse4_2 -# include "strcmp-sse4.S" -#endif diff --git a/sysdeps/i386/i686/multiarch/strncmp-ssse3.S b/sysdeps/i386/i686/multiarch/strncmp-ssse3.S deleted file mode 100644 index 536c8685f2..0000000000 --- a/sysdeps/i386/i686/multiarch/strncmp-ssse3.S +++ /dev/null @@ -1,5 +0,0 @@ -#ifdef SHARED -# define USE_AS_STRNCMP -# define STRCMP __strncmp_ssse3 -# include "strcmp-ssse3.S" -#endif diff --git a/sysdeps/i386/i686/multiarch/strncmp.S b/sysdeps/i386/i686/multiarch/strncmp.S deleted file mode 100644 index 150d4786d2..0000000000 --- a/sysdeps/i386/i686/multiarch/strncmp.S +++ /dev/null @@ -1,5 +0,0 @@ -/* Multiple versions of strncmp - All versions must be listed in ifunc-impl-list.c. */ -#define USE_AS_STRNCMP -#define STRCMP strncmp -#include "strcmp.S" diff --git a/sysdeps/i386/i686/multiarch/strncpy-c.c b/sysdeps/i386/i686/multiarch/strncpy-c.c deleted file mode 100644 index 201e3f98b3..0000000000 --- a/sysdeps/i386/i686/multiarch/strncpy-c.c +++ /dev/null @@ -1,8 +0,0 @@ -#define STRNCPY __strncpy_ia32 -#ifdef SHARED -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) \ - __hidden_ver1 (__strncpy_ia32, __GI_strncpy, __strncpy_ia32); -#endif - -#include "string/strncpy.c" diff --git a/sysdeps/i386/i686/multiarch/strncpy-sse2.S b/sysdeps/i386/i686/multiarch/strncpy-sse2.S deleted file mode 100644 index bdd99239a4..0000000000 --- a/sysdeps/i386/i686/multiarch/strncpy-sse2.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_STRNCPY -#define STRCPY __strncpy_sse2 -#include "strcpy-sse2.S" diff --git a/sysdeps/i386/i686/multiarch/strncpy-ssse3.S b/sysdeps/i386/i686/multiarch/strncpy-ssse3.S deleted file mode 100644 index bf82ee447d..0000000000 --- a/sysdeps/i386/i686/multiarch/strncpy-ssse3.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_STRNCPY -#define STRCPY __strncpy_ssse3 -#include "strcpy-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/strncpy.S b/sysdeps/i386/i686/multiarch/strncpy.S deleted file mode 100644 index 9c257efc6e..0000000000 --- a/sysdeps/i386/i686/multiarch/strncpy.S +++ /dev/null @@ -1,5 +0,0 @@ -/* Multiple versions of strncpy - All versions must be listed in ifunc-impl-list.c. */ -#define USE_AS_STRNCPY -#define STRCPY strncpy -#include "strcpy.S" diff --git a/sysdeps/i386/i686/multiarch/strnlen-c.c b/sysdeps/i386/i686/multiarch/strnlen-c.c deleted file mode 100644 index 351e939a93..0000000000 --- a/sysdeps/i386/i686/multiarch/strnlen-c.c +++ /dev/null @@ -1,10 +0,0 @@ -#define STRNLEN __strnlen_ia32 -#ifdef SHARED -# undef libc_hidden_def -# define libc_hidden_def(name) \ - __hidden_ver1 (__strnlen_ia32, __GI_strnlen, __strnlen_ia32); \ - strong_alias (__strnlen_ia32, __strnlen_ia32_1); \ - __hidden_ver1 (__strnlen_ia32_1, __GI___strnlen, __strnlen_ia32_1); -#endif - -#include "string/strnlen.c" diff --git a/sysdeps/i386/i686/multiarch/strnlen-sse2.S b/sysdeps/i386/i686/multiarch/strnlen-sse2.S deleted file mode 100644 index 56b6ae2a5c..0000000000 --- a/sysdeps/i386/i686/multiarch/strnlen-sse2.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_STRNLEN -#define STRLEN __strnlen_sse2 -#include "strlen-sse2.S" diff --git a/sysdeps/i386/i686/multiarch/strnlen.S b/sysdeps/i386/i686/multiarch/strnlen.S deleted file mode 100644 index d241522c70..0000000000 --- a/sysdeps/i386/i686/multiarch/strnlen.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of strnlen - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if IS_IN (libc) - .text -ENTRY(__strnlen) - .type __strnlen, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__strnlen_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__strnlen_sse2) -2: ret -END(__strnlen) - -weak_alias(__strnlen, strnlen) -#endif diff --git a/sysdeps/i386/i686/multiarch/strpbrk-c.c b/sysdeps/i386/i686/multiarch/strpbrk-c.c deleted file mode 100644 index 5db62053b3..0000000000 --- a/sysdeps/i386/i686/multiarch/strpbrk-c.c +++ /dev/null @@ -1,2 +0,0 @@ -#define __strpbrk_sse2 __strpbrk_ia32 -#include <sysdeps/x86_64/multiarch/strpbrk-c.c> diff --git a/sysdeps/i386/i686/multiarch/strpbrk.S b/sysdeps/i386/i686/multiarch/strpbrk.S deleted file mode 100644 index 7201d6376f..0000000000 --- a/sysdeps/i386/i686/multiarch/strpbrk.S +++ /dev/null @@ -1,5 +0,0 @@ -/* Multiple versions of strpbrk - All versions must be listed in ifunc-impl-list.c. */ -#define STRCSPN strpbrk -#define USE_AS_STRPBRK -#include "strcspn.S" diff --git a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S deleted file mode 100644 index 39a7c8825b..0000000000 --- a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S +++ /dev/null @@ -1,282 +0,0 @@ -/* strrchr with SSE2 with bsf and bsr - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 4 -# define STR1 PARMS -# define STR2 STR1+4 - - .text -ENTRY (__strrchr_sse2_bsf) - - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - - PUSH (%edi) - pxor %xmm2, %xmm2 - mov %ecx, %edi - punpcklbw %xmm1, %xmm1 - punpcklbw %xmm1, %xmm1 - /* ECX has OFFSET. */ - and $63, %ecx - cmp $48, %ecx - pshufd $0, %xmm1, %xmm1 - ja L(crosscashe) - -/* unaligned string. */ - movdqu (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm2, %edx - /* Check if there is a match. */ - pmovmskb %xmm0, %eax - - test %eax, %eax - jnz L(unaligned_match1) - - test %edx, %edx - jnz L(return_null) - - and $-16, %edi - add $16, %edi - - PUSH (%esi) - PUSH (%ebx) - - xor %ebx, %ebx - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 -L(unaligned_return_value1): - bsf %edx, %ecx - mov $2, %edx - shl %cl, %edx - sub $1, %edx - and %edx, %eax - jz L(return_null) - bsr %eax, %eax - add %edi, %eax - POP (%edi) - ret - CFI_PUSH (%edi) - - .p2align 4 -L(unaligned_match1): - test %edx, %edx - jnz L(unaligned_return_value1) - - PUSH (%esi) - PUSH (%ebx) - - mov %eax, %ebx - lea 16(%edi), %esi - and $-16, %edi - add $16, %edi - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 - L(crosscashe): -/* Hancle unaligned string. */ - and $15, %ecx - and $-16, %edi - pxor %xmm3, %xmm3 - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm3 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm3, %edx - /* Check if there is a match. */ - pmovmskb %xmm0, %eax - /* Remove the leading bytes. */ - shr %cl, %edx - shr %cl, %eax - - test %eax, %eax - jnz L(unaligned_match) - - test %edx, %edx - jnz L(return_null) - - add $16, %edi - - PUSH (%esi) - PUSH (%ebx) - - xor %ebx, %ebx - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 -L(unaligned_return_value): - add %ecx, %edi - bsf %edx, %ecx - mov $2, %edx - shl %cl, %edx - sub $1, %edx - and %edx, %eax - jz L(return_null) - bsr %eax, %eax - add %edi, %eax - POP (%edi) - ret - CFI_PUSH (%edi) - - .p2align 4 -L(unaligned_match): - test %edx, %edx - jnz L(unaligned_return_value) - - PUSH (%esi) - PUSH (%ebx) - - mov %eax, %ebx - add $16, %edi - lea (%edi, %ecx), %esi - -/* Loop start on aligned string. */ - .p2align 4 -L(loop): - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jz L(loop) - -L(matches): - test %eax, %eax - jnz L(match) -L(return_value): - test %ebx, %ebx - jz L(return_null_1) - bsr %ebx, %eax - add %esi, %eax - - POP (%ebx) - POP (%esi) - - sub $16, %eax - POP (%edi) - ret - - CFI_PUSH (%edi) - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(match): - pmovmskb %xmm2, %ecx - test %ecx, %ecx - jnz L(return_value_1) - mov %eax, %ebx - mov %edi, %esi - jmp L(loop) - - .p2align 4 -L(return_value_1): - bsf %ecx, %ecx - mov $2, %edx - shl %cl, %edx - sub $1, %edx - and %edx, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - - bsr %eax, %eax - add %edi, %eax - sub $16, %eax - POP (%edi) - ret - - CFI_PUSH (%edi) -/* Return NULL. */ - .p2align 4 -L(return_null): - xor %eax, %eax - POP (%edi) - ret - - CFI_PUSH (%edi) - CFI_PUSH (%ebx) - CFI_PUSH (%esi) -/* Return NULL. */ - .p2align 4 -L(return_null_1): - POP (%ebx) - POP (%esi) - POP (%edi) - xor %eax, %eax - ret - -END (__strrchr_sse2_bsf) -#endif diff --git a/sysdeps/i386/i686/multiarch/strrchr-sse2.S b/sysdeps/i386/i686/multiarch/strrchr-sse2.S deleted file mode 100644 index 20934288be..0000000000 --- a/sysdeps/i386/i686/multiarch/strrchr-sse2.S +++ /dev/null @@ -1,708 +0,0 @@ -/* strrchr SSE2 without bsf and bsr - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 8 -# define ENTRANCE PUSH(%edi); -# define RETURN POP(%edi); ret; CFI_PUSH(%edi); - -# define STR1 PARMS -# define STR2 STR1+4 - - atom_text_section -ENTRY (__strrchr_sse2) - - ENTRANCE - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - - pxor %xmm2, %xmm2 - mov %ecx, %edi - punpcklbw %xmm1, %xmm1 - punpcklbw %xmm1, %xmm1 - /* ECX has OFFSET. */ - and $63, %ecx - cmp $48, %ecx - pshufd $0, %xmm1, %xmm1 - ja L(crosscache) - -/* unaligned string. */ - movdqu (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm2, %ecx - /* Check if there is a match. */ - pmovmskb %xmm0, %eax - add $16, %edi - - test %eax, %eax - jnz L(unaligned_match1) - - test %ecx, %ecx - jnz L(return_null) - - and $-16, %edi - - PUSH (%esi) - PUSH (%ebx) - - xor %ebx, %ebx - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 -L(unaligned_match1): - test %ecx, %ecx - jnz L(prolog_find_zero_1) - - PUSH (%esi) - PUSH (%ebx) - - mov %eax, %ebx - mov %edi, %esi - and $-16, %edi - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 -L(crosscache): -/* Hancle unaligned string. */ - and $15, %ecx - and $-16, %edi - pxor %xmm3, %xmm3 - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm3 - pcmpeqb %xmm1, %xmm0 - /* Find where NULL is. */ - pmovmskb %xmm3, %edx - /* Check if there is a match. */ - pmovmskb %xmm0, %eax - /* Remove the leading bytes. */ - shr %cl, %edx - shr %cl, %eax - add $16, %edi - - test %eax, %eax - jnz L(unaligned_match) - - test %edx, %edx - jnz L(return_null) - - PUSH (%esi) - PUSH (%ebx) - - xor %ebx, %ebx - jmp L(loop) - - CFI_POP (%esi) - CFI_POP (%ebx) - - .p2align 4 -L(unaligned_match): - test %edx, %edx - jnz L(prolog_find_zero) - - PUSH (%esi) - PUSH (%ebx) - - mov %eax, %ebx - lea (%edi, %ecx), %esi - -/* Loop start on aligned string. */ - .p2align 4 -L(loop): - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm0 - pcmpeqb %xmm0, %xmm2 - add $16, %edi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jz L(loop) - -L(matches): - test %eax, %eax - jnz L(match) -L(return_value): - test %ebx, %ebx - jz L(return_null_1) - mov %ebx, %eax - mov %esi, %edi - - POP (%ebx) - POP (%esi) - - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(return_null_1): - POP (%ebx) - POP (%esi) - - xor %eax, %eax - RETURN - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(match): - pmovmskb %xmm2, %ecx - test %ecx, %ecx - jnz L(find_zero) - mov %eax, %ebx - mov %edi, %esi - jmp L(loop) - - .p2align 4 -L(find_zero): - test %cl, %cl - jz L(find_zero_high) - mov %cl, %dl - and $15, %dl - jz L(find_zero_8) - test $0x01, %cl - jnz L(FindZeroExit1) - test $0x02, %cl - jnz L(FindZeroExit2) - test $0x04, %cl - jnz L(FindZeroExit3) - and $1 << 4 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_8): - test $0x10, %cl - jnz L(FindZeroExit5) - test $0x20, %cl - jnz L(FindZeroExit6) - test $0x40, %cl - jnz L(FindZeroExit7) - and $1 << 8 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_high): - mov %ch, %dh - and $15, %dh - jz L(find_zero_high_8) - test $0x01, %ch - jnz L(FindZeroExit9) - test $0x02, %ch - jnz L(FindZeroExit10) - test $0x04, %ch - jnz L(FindZeroExit11) - and $1 << 12 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_high_8): - test $0x10, %ch - jnz L(FindZeroExit13) - test $0x20, %ch - jnz L(FindZeroExit14) - test $0x40, %ch - jnz L(FindZeroExit15) - and $1 << 16 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit1): - and $1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit2): - and $1 << 2 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit3): - and $1 << 3 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit5): - and $1 << 5 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit6): - and $1 << 6 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit7): - and $1 << 7 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit9): - and $1 << 9 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit10): - and $1 << 10 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit11): - and $1 << 11 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit13): - and $1 << 13 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit14): - and $1 << 14 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - jmp L(match_exit) - - CFI_PUSH (%ebx) - CFI_PUSH (%esi) - - .p2align 4 -L(FindZeroExit15): - and $1 << 15 - 1, %eax - jz L(return_value) - - POP (%ebx) - POP (%esi) - - .p2align 4 -L(match_exit): - test %ah, %ah - jnz L(match_exit_high) - mov %al, %dl - and $15 << 4, %dl - jnz L(match_exit_8) - test $0x08, %al - jnz L(Exit4) - test $0x04, %al - jnz L(Exit3) - test $0x02, %al - jnz L(Exit2) - lea -16(%edi), %eax - RETURN - - .p2align 4 -L(match_exit_8): - test $0x80, %al - jnz L(Exit8) - test $0x40, %al - jnz L(Exit7) - test $0x20, %al - jnz L(Exit6) - lea -12(%edi), %eax - RETURN - - .p2align 4 -L(match_exit_high): - mov %ah, %dh - and $15 << 4, %dh - jnz L(match_exit_high_8) - test $0x08, %ah - jnz L(Exit12) - test $0x04, %ah - jnz L(Exit11) - test $0x02, %ah - jnz L(Exit10) - lea -8(%edi), %eax - RETURN - - .p2align 4 -L(match_exit_high_8): - test $0x80, %ah - jnz L(Exit16) - test $0x40, %ah - jnz L(Exit15) - test $0x20, %ah - jnz L(Exit14) - lea -4(%edi), %eax - RETURN - - .p2align 4 -L(Exit2): - lea -15(%edi), %eax - RETURN - - .p2align 4 -L(Exit3): - lea -14(%edi), %eax - RETURN - - .p2align 4 -L(Exit4): - lea -13(%edi), %eax - RETURN - - .p2align 4 -L(Exit6): - lea -11(%edi), %eax - RETURN - - .p2align 4 -L(Exit7): - lea -10(%edi), %eax - RETURN - - .p2align 4 -L(Exit8): - lea -9(%edi), %eax - RETURN - - .p2align 4 -L(Exit10): - lea -7(%edi), %eax - RETURN - - .p2align 4 -L(Exit11): - lea -6(%edi), %eax - RETURN - - .p2align 4 -L(Exit12): - lea -5(%edi), %eax - RETURN - - .p2align 4 -L(Exit14): - lea -3(%edi), %eax - RETURN - - .p2align 4 -L(Exit15): - lea -2(%edi), %eax - RETURN - - .p2align 4 -L(Exit16): - lea -1(%edi), %eax - RETURN - -/* Return NULL. */ - .p2align 4 -L(return_null): - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero): - add %ecx, %edi - mov %edx, %ecx -L(prolog_find_zero_1): - test %cl, %cl - jz L(prolog_find_zero_high) - mov %cl, %dl - and $15, %dl - jz L(prolog_find_zero_8) - test $0x01, %cl - jnz L(PrologFindZeroExit1) - test $0x02, %cl - jnz L(PrologFindZeroExit2) - test $0x04, %cl - jnz L(PrologFindZeroExit3) - and $1 << 4 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero_8): - test $0x10, %cl - jnz L(PrologFindZeroExit5) - test $0x20, %cl - jnz L(PrologFindZeroExit6) - test $0x40, %cl - jnz L(PrologFindZeroExit7) - and $1 << 8 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero_high): - mov %ch, %dh - and $15, %dh - jz L(prolog_find_zero_high_8) - test $0x01, %ch - jnz L(PrologFindZeroExit9) - test $0x02, %ch - jnz L(PrologFindZeroExit10) - test $0x04, %ch - jnz L(PrologFindZeroExit11) - and $1 << 12 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero_high_8): - test $0x10, %ch - jnz L(PrologFindZeroExit13) - test $0x20, %ch - jnz L(PrologFindZeroExit14) - test $0x40, %ch - jnz L(PrologFindZeroExit15) - and $1 << 16 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit1): - and $1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit2): - and $1 << 2 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit3): - and $1 << 3 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit5): - and $1 << 5 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit6): - and $1 << 6 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit7): - and $1 << 7 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit9): - and $1 << 9 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit10): - and $1 << 10 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit11): - and $1 << 11 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit13): - and $1 << 13 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit14): - and $1 << 14 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - - .p2align 4 -L(PrologFindZeroExit15): - and $1 << 15 - 1, %eax - jnz L(match_exit) - xor %eax, %eax - RETURN - -END (__strrchr_sse2) -#endif diff --git a/sysdeps/i386/i686/multiarch/strrchr.S b/sysdeps/i386/i686/multiarch/strrchr.S deleted file mode 100644 index d9281eaeae..0000000000 --- a/sysdeps/i386/i686/multiarch/strrchr.S +++ /dev/null @@ -1,57 +0,0 @@ -/* Multiple versions of strrchr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if IS_IN (libc) - .text -ENTRY(strrchr) - .type strrchr, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__strrchr_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__strrchr_sse2_bsf) - HAS_ARCH_FEATURE (Slow_BSF) - jz 2f - LOAD_FUNC_GOT_EAX (__strrchr_sse2) -2: ret -END(strrchr) - -# undef ENTRY -# define ENTRY(name) \ - .type __strrchr_ia32, @function; \ - .globl __strrchr_ia32; \ - .p2align 4; \ - __strrchr_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __strrchr_ia32, .-__strrchr_ia32 -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_strrchr; __GI_strrchr = __strrchr_ia32 -#endif - -#include "../../strrchr.S" diff --git a/sysdeps/i386/i686/multiarch/strspn-c.c b/sysdeps/i386/i686/multiarch/strspn-c.c deleted file mode 100644 index bea09dea71..0000000000 --- a/sysdeps/i386/i686/multiarch/strspn-c.c +++ /dev/null @@ -1,2 +0,0 @@ -#define __strspn_sse2 __strspn_ia32 -#include <sysdeps/x86_64/multiarch/strspn-c.c> diff --git a/sysdeps/i386/i686/multiarch/strspn.S b/sysdeps/i386/i686/multiarch/strspn.S deleted file mode 100644 index 1269062381..0000000000 --- a/sysdeps/i386/i686/multiarch/strspn.S +++ /dev/null @@ -1,56 +0,0 @@ -/* Multiple versions of strspn - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2009-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <config.h> -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in libc. */ -#if IS_IN (libc) - .text -ENTRY(strspn) - .type strspn, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__strspn_ia32) - HAS_CPU_FEATURE (SSE4_2) - jz 2f - LOAD_FUNC_GOT_EAX (__strspn_sse42) -2: ret -END(strspn) - -# undef ENTRY -# define ENTRY(name) \ - .type __strspn_ia32, @function; \ - .globl __strspn_ia32; \ - .p2align 4; \ -__strspn_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __strspn_ia32, .-__strspn_ia32 -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_strspn; __GI_strspn = __strspn_ia32 -#endif - -#include "../../strspn.S" diff --git a/sysdeps/i386/i686/multiarch/test-multiarch.c b/sysdeps/i386/i686/multiarch/test-multiarch.c deleted file mode 100644 index 593cfec273..0000000000 --- a/sysdeps/i386/i686/multiarch/test-multiarch.c +++ /dev/null @@ -1 +0,0 @@ -#include <sysdeps/x86_64/multiarch/test-multiarch.c> diff --git a/sysdeps/i386/i686/multiarch/varshift.c b/sysdeps/i386/i686/multiarch/varshift.c deleted file mode 100644 index 7760b966e2..0000000000 --- a/sysdeps/i386/i686/multiarch/varshift.c +++ /dev/null @@ -1 +0,0 @@ -#include <sysdeps/x86_64/multiarch/varshift.c> diff --git a/sysdeps/i386/i686/multiarch/varshift.h b/sysdeps/i386/i686/multiarch/varshift.h deleted file mode 100644 index 7c72c70d67..0000000000 --- a/sysdeps/i386/i686/multiarch/varshift.h +++ /dev/null @@ -1 +0,0 @@ -#include <sysdeps/x86_64/multiarch/varshift.h> diff --git a/sysdeps/i386/i686/multiarch/wcschr-c.c b/sysdeps/i386/i686/multiarch/wcschr-c.c deleted file mode 100644 index 38d41d04de..0000000000 --- a/sysdeps/i386/i686/multiarch/wcschr-c.c +++ /dev/null @@ -1,22 +0,0 @@ -#include <wchar.h> - -#if IS_IN (libc) -# undef libc_hidden_weak -# define libc_hidden_weak(name) - -# undef weak_alias -# define weak_alias(name,alias) - -# ifdef SHARED -# undef libc_hidden_def -# define libc_hidden_def(name) \ - __hidden_ver1 (__wcschr_ia32, __GI_wcschr, __wcschr_ia32); \ - strong_alias (__wcschr_ia32, __wcschr_ia32_1); \ - __hidden_ver1 (__wcschr_ia32_1, __GI___wcschr, __wcschr_ia32_1); -# endif -#endif - -extern __typeof (wcschr) __wcschr_ia32; - -#define WCSCHR __wcschr_ia32 -#include <wcsmbs/wcschr.c> diff --git a/sysdeps/i386/i686/multiarch/wcschr-sse2.S b/sysdeps/i386/i686/multiarch/wcschr-sse2.S deleted file mode 100644 index 9ff6c3b8d6..0000000000 --- a/sysdeps/i386/i686/multiarch/wcschr-sse2.S +++ /dev/null @@ -1,219 +0,0 @@ -/* wcschr with SSE2, without using bsf instructions - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 4 -# define STR1 PARMS -# define STR2 STR1+4 - - atom_text_section -ENTRY (__wcschr_sse2) - - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - - mov %ecx, %eax - punpckldq %xmm1, %xmm1 - pxor %xmm2, %xmm2 - punpckldq %xmm1, %xmm1 - - and $63, %eax - cmp $48, %eax - ja L(cross_cache) - - movdqu (%ecx), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - and $-16, %ecx - jmp L(loop) - - .p2align 4 -L(cross_cache): - PUSH (%edi) - mov %ecx, %edi - mov %eax, %ecx - and $-16, %edi - and $15, %ecx - movdqa (%edi), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - - sarl %cl, %edx - sarl %cl, %eax - test %eax, %eax - jz L(unaligned_no_match) - - add %edi, %ecx - POP (%edi) - - test %edx, %edx - jz L(match_case1) - test %al, %al - jz L(match_higth_case2) - test $15, %al - jnz L(match_case2_4) - test $15, %dl - jnz L(return_null) - lea 4(%ecx), %eax - ret - - CFI_PUSH (%edi) - - .p2align 4 -L(unaligned_no_match): - mov %edi, %ecx - POP (%edi) - - test %edx, %edx - jnz L(return_null) - - pxor %xmm2, %xmm2 - -/* Loop start on aligned string. */ - .p2align 4 -L(loop): - add $16, %ecx - movdqa (%ecx), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - add $16, %ecx - - movdqa (%ecx), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - add $16, %ecx - - movdqa (%ecx), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jnz L(matches) - add $16, %ecx - - movdqa (%ecx), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %edx - pmovmskb %xmm0, %eax - or %eax, %edx - jz L(loop) - - .p2align 4 -L(matches): - pmovmskb %xmm2, %edx - test %eax, %eax - jz L(return_null) - test %edx, %edx - jz L(match_case1) - - .p2align 4 -L(match_case2): - test %al, %al - jz L(match_higth_case2) - test $15, %al - jnz L(match_case2_4) - test $15, %dl - jnz L(return_null) - lea 4(%ecx), %eax - ret - - .p2align 4 -L(match_case2_4): - mov %ecx, %eax - ret - - .p2align 4 -L(match_higth_case2): - test %dl, %dl - jnz L(return_null) - test $15, %ah - jnz L(match_case2_12) - test $15, %dh - jnz L(return_null) - lea 12(%ecx), %eax - ret - - .p2align 4 -L(match_case2_12): - lea 8(%ecx), %eax - ret - - .p2align 4 -L(match_case1): - test %al, %al - jz L(match_higth_case1) - - test $0x01, %al - jnz L(exit0) - lea 4(%ecx), %eax - ret - - .p2align 4 -L(match_higth_case1): - test $0x01, %ah - jnz L(exit3) - lea 12(%ecx), %eax - ret - - .p2align 4 -L(exit0): - mov %ecx, %eax - ret - - .p2align 4 -L(exit3): - lea 8(%ecx), %eax - ret - - .p2align 4 -L(return_null): - xor %eax, %eax - ret - -END (__wcschr_sse2) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcschr.S b/sysdeps/i386/i686/multiarch/wcschr.S deleted file mode 100644 index d3c65a6436..0000000000 --- a/sysdeps/i386/i686/multiarch/wcschr.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of wcschr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if IS_IN (libc) - .text -ENTRY(__wcschr) - .type wcschr, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__wcschr_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__wcschr_sse2) -2: ret -END(__wcschr) -weak_alias (__wcschr, wcschr) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcscmp-c.c b/sysdeps/i386/i686/multiarch/wcscmp-c.c deleted file mode 100644 index e3337d77e2..0000000000 --- a/sysdeps/i386/i686/multiarch/wcscmp-c.c +++ /dev/null @@ -1,14 +0,0 @@ -#include <wchar.h> - -#define WCSCMP __wcscmp_ia32 -#ifdef SHARED -# undef libc_hidden_def -# define libc_hidden_def(name) \ - __hidden_ver1 (__wcscmp_ia32, __GI___wcscmp, __wcscmp_ia32); -#endif -#undef weak_alias -#define weak_alias(name, alias) - -extern __typeof (wcscmp) __wcscmp_ia32; - -#include "wcsmbs/wcscmp.c" diff --git a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S b/sysdeps/i386/i686/multiarch/wcscmp-sse2.S deleted file mode 100644 index a464b58204..0000000000 --- a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S +++ /dev/null @@ -1,1018 +0,0 @@ -/* wcscmp with SSE2 - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) - -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define ENTRANCE PUSH(%esi); PUSH(%edi) -# define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi); -# define PARMS 4 -# define STR1 PARMS -# define STR2 STR1+4 - -/* Note: wcscmp uses signed comparison, not unsugned as in strcmp function. */ - - .text -ENTRY (__wcscmp_sse2) -/* - * This implementation uses SSE to compare up to 16 bytes at a time. -*/ - mov STR1(%esp), %edx - mov STR2(%esp), %eax - - mov (%eax), %ecx - cmp %ecx, (%edx) - jne L(neq) - test %ecx, %ecx - jz L(eq) - - mov 4(%eax), %ecx - cmp %ecx, 4(%edx) - jne L(neq) - test %ecx, %ecx - jz L(eq) - - mov 8(%eax), %ecx - cmp %ecx, 8(%edx) - jne L(neq) - test %ecx, %ecx - jz L(eq) - - mov 12(%eax), %ecx - cmp %ecx, 12(%edx) - jne L(neq) - test %ecx, %ecx - jz L(eq) - - ENTRANCE - add $16, %eax - add $16, %edx - - mov %eax, %esi - mov %edx, %edi - pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ - mov %al, %ch - mov %dl, %cl - and $63, %eax /* esi alignment in cache line */ - and $63, %edx /* edi alignment in cache line */ - and $15, %cl - jz L(continue_00) - cmp $16, %edx - jb L(continue_0) - cmp $32, %edx - jb L(continue_16) - cmp $48, %edx - jb L(continue_32) - -L(continue_48): - and $15, %ch - jz L(continue_48_00) - cmp $16, %eax - jb L(continue_0_48) - cmp $32, %eax - jb L(continue_16_48) - cmp $48, %eax - jb L(continue_32_48) - - .p2align 4 -L(continue_48_48): - mov (%esi), %ecx - cmp %ecx, (%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 4(%esi), %ecx - cmp %ecx, 4(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 8(%esi), %ecx - cmp %ecx, 8(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 12(%esi), %ecx - cmp %ecx, 12(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - movdqu 16(%edi), %xmm1 - movdqu 16(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%edi), %xmm1 - movdqu 32(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - movdqu 48(%edi), %xmm1 - movdqu 48(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_48) - - add $64, %esi - add $64, %edi - jmp L(continue_48_48) - -L(continue_0): - and $15, %ch - jz L(continue_0_00) - cmp $16, %eax - jb L(continue_0_0) - cmp $32, %eax - jb L(continue_0_16) - cmp $48, %eax - jb L(continue_0_32) - - .p2align 4 -L(continue_0_48): - mov (%esi), %ecx - cmp %ecx, (%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 4(%esi), %ecx - cmp %ecx, 4(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 8(%esi), %ecx - cmp %ecx, 8(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 12(%esi), %ecx - cmp %ecx, 12(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - movdqu 16(%edi), %xmm1 - movdqu 16(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%edi), %xmm1 - movdqu 32(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - mov 48(%esi), %ecx - cmp %ecx, 48(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 52(%esi), %ecx - cmp %ecx, 52(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 56(%esi), %ecx - cmp %ecx, 56(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 60(%esi), %ecx - cmp %ecx, 60(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - add $64, %esi - add $64, %edi - jmp L(continue_0_48) - - .p2align 4 -L(continue_00): - and $15, %ch - jz L(continue_00_00) - cmp $16, %eax - jb L(continue_00_0) - cmp $32, %eax - jb L(continue_00_16) - cmp $48, %eax - jb L(continue_00_32) - - .p2align 4 -L(continue_00_48): - pcmpeqd (%edi), %xmm0 - mov (%edi), %eax - pmovmskb %xmm0, %ecx - test %ecx, %ecx - jnz L(less4_double_words1) - - cmp (%esi), %eax - jne L(nequal) - - mov 4(%edi), %eax - cmp 4(%esi), %eax - jne L(nequal) - - mov 8(%edi), %eax - cmp 8(%esi), %eax - jne L(nequal) - - mov 12(%edi), %eax - cmp 12(%esi), %eax - jne L(nequal) - - movdqu 16(%esi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%esi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - movdqu 48(%esi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_48) - - add $64, %esi - add $64, %edi - jmp L(continue_00_48) - - .p2align 4 -L(continue_32): - and $15, %ch - jz L(continue_32_00) - cmp $16, %eax - jb L(continue_0_32) - cmp $32, %eax - jb L(continue_16_32) - cmp $48, %eax - jb L(continue_32_32) - - .p2align 4 -L(continue_32_48): - mov (%esi), %ecx - cmp %ecx, (%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 4(%esi), %ecx - cmp %ecx, 4(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 8(%esi), %ecx - cmp %ecx, 8(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 12(%esi), %ecx - cmp %ecx, 12(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 16(%esi), %ecx - cmp %ecx, 16(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 20(%esi), %ecx - cmp %ecx, 20(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 24(%esi), %ecx - cmp %ecx, 24(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 28(%esi), %ecx - cmp %ecx, 28(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - movdqu 32(%edi), %xmm1 - movdqu 32(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - movdqu 48(%edi), %xmm1 - movdqu 48(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results */ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_48) - - add $64, %esi - add $64, %edi - jmp L(continue_32_48) - - .p2align 4 -L(continue_16): - and $15, %ch - jz L(continue_16_00) - cmp $16, %eax - jb L(continue_0_16) - cmp $32, %eax - jb L(continue_16_16) - cmp $48, %eax - jb L(continue_16_32) - - .p2align 4 -L(continue_16_48): - mov (%esi), %ecx - cmp %ecx, (%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 4(%esi), %ecx - cmp %ecx, 4(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 8(%esi), %ecx - cmp %ecx, 8(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 12(%esi), %ecx - cmp %ecx, 12(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - movdqu 16(%edi), %xmm1 - movdqu 16(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - mov 32(%esi), %ecx - cmp %ecx, 32(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 36(%esi), %ecx - cmp %ecx, 36(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 40(%esi), %ecx - cmp %ecx, 40(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 44(%esi), %ecx - cmp %ecx, 44(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - movdqu 48(%edi), %xmm1 - movdqu 48(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_48) - - add $64, %esi - add $64, %edi - jmp L(continue_16_48) - - .p2align 4 -L(continue_00_00): - movdqa (%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqa 16(%edi), %xmm3 - pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm3 /* packed sub of comparison results*/ - pmovmskb %xmm3, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqa 32(%edi), %xmm5 - pcmpeqd %xmm5, %xmm0 /* Any null double_word? */ - pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm5 /* packed sub of comparison results*/ - pmovmskb %xmm5, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - movdqa 48(%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_48) - - add $64, %esi - add $64, %edi - jmp L(continue_00_00) - - .p2align 4 -L(continue_00_32): - movdqu (%esi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - add $16, %esi - add $16, %edi - jmp L(continue_00_48) - - .p2align 4 -L(continue_00_16): - movdqu (%esi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%esi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - add $32, %esi - add $32, %edi - jmp L(continue_00_48) - - .p2align 4 -L(continue_00_0): - movdqu (%esi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%esi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%esi), %xmm2 - pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ - pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm2 /* packed sub of comparison results*/ - pmovmskb %xmm2, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - add $48, %esi - add $48, %edi - jmp L(continue_00_48) - - .p2align 4 -L(continue_48_00): - pcmpeqd (%esi), %xmm0 - mov (%edi), %eax - pmovmskb %xmm0, %ecx - test %ecx, %ecx - jnz L(less4_double_words1) - - cmp (%esi), %eax - jne L(nequal) - - mov 4(%edi), %eax - cmp 4(%esi), %eax - jne L(nequal) - - mov 8(%edi), %eax - cmp 8(%esi), %eax - jne L(nequal) - - mov 12(%edi), %eax - cmp 12(%esi), %eax - jne L(nequal) - - movdqu 16(%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - movdqu 48(%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_48) - - add $64, %esi - add $64, %edi - jmp L(continue_48_00) - - .p2align 4 -L(continue_32_00): - movdqu (%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - add $16, %esi - add $16, %edi - jmp L(continue_48_00) - - .p2align 4 -L(continue_16_00): - movdqu (%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - add $32, %esi - add $32, %edi - jmp L(continue_48_00) - - .p2align 4 -L(continue_0_00): - movdqu (%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%edi), %xmm1 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - add $48, %esi - add $48, %edi - jmp L(continue_48_00) - - .p2align 4 -L(continue_32_32): - movdqu (%edi), %xmm1 - movdqu (%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - add $16, %esi - add $16, %edi - jmp L(continue_48_48) - - .p2align 4 -L(continue_16_16): - movdqu (%edi), %xmm1 - movdqu (%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%edi), %xmm3 - movdqu 16(%esi), %xmm4 - pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm3 /* packed sub of comparison results*/ - pmovmskb %xmm3, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - add $32, %esi - add $32, %edi - jmp L(continue_48_48) - - .p2align 4 -L(continue_0_0): - movdqu (%edi), %xmm1 - movdqu (%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%edi), %xmm3 - movdqu 16(%esi), %xmm4 - pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm3 /* packed sub of comparison results*/ - pmovmskb %xmm3, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - movdqu 32(%edi), %xmm1 - movdqu 32(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_32) - - add $48, %esi - add $48, %edi - jmp L(continue_48_48) - - .p2align 4 -L(continue_0_16): - movdqu (%edi), %xmm1 - movdqu (%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - movdqu 16(%edi), %xmm1 - movdqu 16(%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words_16) - - add $32, %esi - add $32, %edi - jmp L(continue_32_48) - - .p2align 4 -L(continue_0_32): - movdqu (%edi), %xmm1 - movdqu (%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - add $16, %esi - add $16, %edi - jmp L(continue_16_48) - - .p2align 4 -L(continue_16_32): - movdqu (%edi), %xmm1 - movdqu (%esi), %xmm2 - pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ - pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ - psubb %xmm0, %xmm1 /* packed sub of comparison results*/ - pmovmskb %xmm1, %edx - sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ - jnz L(less4_double_words) - - add $16, %esi - add $16, %edi - jmp L(continue_32_48) - - .p2align 4 -L(less4_double_words1): - cmp (%esi), %eax - jne L(nequal) - test %eax, %eax - jz L(equal) - - mov 4(%esi), %ecx - cmp %ecx, 4(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 8(%esi), %ecx - cmp %ecx, 8(%edi) - jne L(nequal) - test %ecx, %ecx - jz L(equal) - - mov 12(%esi), %ecx - cmp %ecx, 12(%edi) - jne L(nequal) - xor %eax, %eax - RETURN - - .p2align 4 -L(less4_double_words): - xor %eax, %eax - test %dl, %dl - jz L(next_two_double_words) - and $15, %dl - jz L(second_double_word) - mov (%esi), %ecx - cmp %ecx, (%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(second_double_word): - mov 4(%esi), %ecx - cmp %ecx, 4(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(next_two_double_words): - and $15, %dh - jz L(fourth_double_word) - mov 8(%esi), %ecx - cmp %ecx, 8(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(fourth_double_word): - mov 12(%esi), %ecx - cmp %ecx, 12(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(less4_double_words_16): - xor %eax, %eax - test %dl, %dl - jz L(next_two_double_words_16) - and $15, %dl - jz L(second_double_word_16) - mov 16(%esi), %ecx - cmp %ecx, 16(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(second_double_word_16): - mov 20(%esi), %ecx - cmp %ecx, 20(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(next_two_double_words_16): - and $15, %dh - jz L(fourth_double_word_16) - mov 24(%esi), %ecx - cmp %ecx, 24(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(fourth_double_word_16): - mov 28(%esi), %ecx - cmp %ecx, 28(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(less4_double_words_32): - xor %eax, %eax - test %dl, %dl - jz L(next_two_double_words_32) - and $15, %dl - jz L(second_double_word_32) - mov 32(%esi), %ecx - cmp %ecx, 32(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(second_double_word_32): - mov 36(%esi), %ecx - cmp %ecx, 36(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(next_two_double_words_32): - and $15, %dh - jz L(fourth_double_word_32) - mov 40(%esi), %ecx - cmp %ecx, 40(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(fourth_double_word_32): - mov 44(%esi), %ecx - cmp %ecx, 44(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(less4_double_words_48): - xor %eax, %eax - test %dl, %dl - jz L(next_two_double_words_48) - and $15, %dl - jz L(second_double_word_48) - mov 48(%esi), %ecx - cmp %ecx, 48(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(second_double_word_48): - mov 52(%esi), %ecx - cmp %ecx, 52(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(next_two_double_words_48): - and $15, %dh - jz L(fourth_double_word_48) - mov 56(%esi), %ecx - cmp %ecx, 56(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(fourth_double_word_48): - mov 60(%esi), %ecx - cmp %ecx, 60(%edi) - jne L(nequal) - RETURN - - .p2align 4 -L(nequal): - mov $1, %eax - jg L(return) - neg %eax - RETURN - - .p2align 4 -L(return): - RETURN - - .p2align 4 -L(equal): - xorl %eax, %eax - RETURN - - CFI_POP (%edi) - CFI_POP (%esi) - - .p2align 4 -L(neq): - mov $1, %eax - jg L(neq_bigger) - neg %eax - -L(neq_bigger): - ret - - .p2align 4 -L(eq): - xorl %eax, %eax - ret - -END (__wcscmp_sse2) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcscmp.S b/sysdeps/i386/i686/multiarch/wcscmp.S deleted file mode 100644 index 7118bdd4db..0000000000 --- a/sysdeps/i386/i686/multiarch/wcscmp.S +++ /dev/null @@ -1,39 +0,0 @@ -/* Multiple versions of wcscmp - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in libc and for the - DSO. In static binaries, we need wcscmp before the initialization - happened. */ -#if IS_IN (libc) - .text -ENTRY(__wcscmp) - .type __wcscmp, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__wcscmp_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__wcscmp_sse2) -2: ret -END(__wcscmp) -weak_alias (__wcscmp, wcscmp) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcscpy-c.c b/sysdeps/i386/i686/multiarch/wcscpy-c.c deleted file mode 100644 index fb3000392b..0000000000 --- a/sysdeps/i386/i686/multiarch/wcscpy-c.c +++ /dev/null @@ -1,5 +0,0 @@ -#if IS_IN (libc) -# define wcscpy __wcscpy_ia32 -#endif - -#include "wcsmbs/wcscpy.c" diff --git a/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S b/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S deleted file mode 100644 index 6280ba92ab..0000000000 --- a/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S +++ /dev/null @@ -1,600 +0,0 @@ -/* wcscpy with SSSE3 - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) -# include <sysdep.h> - -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 4 -# define RETURN POP (%edi); ret; CFI_PUSH (%edi) -# define STR1 PARMS -# define STR2 STR1+4 -# define LEN STR2+4 - - atom_text_section -ENTRY (__wcscpy_ssse3) - mov STR1(%esp), %edx - mov STR2(%esp), %ecx - - cmp $0, (%ecx) - jz L(ExitTail4) - cmp $0, 4(%ecx) - jz L(ExitTail8) - cmp $0, 8(%ecx) - jz L(ExitTail12) - cmp $0, 12(%ecx) - jz L(ExitTail16) - - PUSH (%edi) - mov %edx, %edi - PUSH (%esi) - lea 16(%ecx), %esi - - and $-16, %esi - - pxor %xmm0, %xmm0 - pcmpeqd (%esi), %xmm0 - movdqu (%ecx), %xmm1 - movdqu %xmm1, (%edx) - - pmovmskb %xmm0, %eax - sub %ecx, %esi - - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - mov %edx, %eax - lea 16(%edx), %edx - and $-16, %edx - sub %edx, %eax - - sub %eax, %ecx - mov %ecx, %eax - and $0xf, %eax - mov $0, %esi - - jz L(Align16Both) - cmp $4, %eax - je L(Shl4) - cmp $8, %eax - je L(Shl8) - jmp L(Shl12) - -L(Align16Both): - movaps (%ecx), %xmm1 - movaps 16(%ecx), %xmm2 - movaps %xmm1, (%edx) - pcmpeqd %xmm2, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi - - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm3 - movaps %xmm2, (%edx, %esi) - pcmpeqd %xmm3, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi - - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm4 - movaps %xmm3, (%edx, %esi) - pcmpeqd %xmm4, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi - - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm1 - movaps %xmm4, (%edx, %esi) - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi - - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm2 - movaps %xmm1, (%edx, %esi) - pcmpeqd %xmm2, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi - - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps 16(%ecx, %esi), %xmm3 - movaps %xmm2, (%edx, %esi) - pcmpeqd %xmm3, %xmm0 - pmovmskb %xmm0, %eax - lea 16(%esi), %esi - - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - movaps %xmm3, (%edx, %esi) - mov %ecx, %eax - lea 16(%ecx, %esi), %ecx - and $-0x40, %ecx - sub %ecx, %eax - sub %eax, %edx - - mov $-0x40, %esi - -L(Aligned64Loop): - movaps (%ecx), %xmm2 - movaps 32(%ecx), %xmm3 - movaps %xmm2, %xmm4 - movaps 16(%ecx), %xmm5 - movaps %xmm3, %xmm6 - movaps 48(%ecx), %xmm7 - pminub %xmm5, %xmm2 - pminub %xmm7, %xmm3 - pminub %xmm2, %xmm3 - lea 64(%edx), %edx - pcmpeqd %xmm0, %xmm3 - lea 64(%ecx), %ecx - pmovmskb %xmm3, %eax - - test %eax, %eax - jnz L(Aligned64Leave) - movaps %xmm4, -64(%edx) - movaps %xmm5, -48(%edx) - movaps %xmm6, -32(%edx) - movaps %xmm7, -16(%edx) - jmp L(Aligned64Loop) - -L(Aligned64Leave): - pcmpeqd %xmm4, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - jnz L(CopyFrom1To16Bytes) - - pcmpeqd %xmm5, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm4, -64(%edx) - test %eax, %eax - lea 16(%esi), %esi - jnz L(CopyFrom1To16Bytes) - - pcmpeqd %xmm6, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm5, -48(%edx) - test %eax, %eax - lea 16(%esi), %esi - jnz L(CopyFrom1To16Bytes) - - movaps %xmm6, -32(%edx) - pcmpeqd %xmm7, %xmm0 - pmovmskb %xmm0, %eax - test %eax, %eax - lea 16(%esi), %esi - jnz L(CopyFrom1To16Bytes) - - mov $-0x40, %esi - movaps %xmm7, -16(%edx) - jmp L(Aligned64Loop) - - .p2align 4 -L(Shl4): - movaps -4(%ecx), %xmm1 - movaps 12(%ecx), %xmm2 -L(Shl4Start): - pcmpeqd %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 - - test %eax, %eax - jnz L(Shl4LoopExit) - - palignr $4, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 28(%ecx), %xmm2 - - pcmpeqd %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm1 - - test %eax, %eax - jnz L(Shl4LoopExit) - - palignr $4, %xmm3, %xmm2 - movaps %xmm2, (%edx) - movaps 28(%ecx), %xmm2 - - pcmpeqd %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 - - test %eax, %eax - jnz L(Shl4LoopExit) - - palignr $4, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 28(%ecx), %xmm2 - - pcmpeqd %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - - test %eax, %eax - jnz L(Shl4LoopExit) - - palignr $4, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 28(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -12(%ecx), %ecx - sub %eax, %edx - - movaps -4(%ecx), %xmm1 - -L(Shl4LoopStart): - movaps 12(%ecx), %xmm2 - movaps 28(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 44(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 60(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqd %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $4, %xmm4, %xmm5 - test %eax, %eax - palignr $4, %xmm3, %xmm4 - jnz L(Shl4Start) - - palignr $4, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $4, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl4LoopStart) - -L(Shl4LoopExit): - movlpd (%ecx), %xmm0 - movl 8(%ecx), %esi - movlpd %xmm0, (%edx) - movl %esi, 8(%edx) - POP (%esi) - add $12, %edx - add $12, %ecx - test %al, %al - jz L(ExitHigh) - test $0x01, %al - jnz L(Exit4) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl %edi, %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(Shl8): - movaps -8(%ecx), %xmm1 - movaps 8(%ecx), %xmm2 -L(Shl8Start): - pcmpeqd %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 - - test %eax, %eax - jnz L(Shl8LoopExit) - - palignr $8, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 24(%ecx), %xmm2 - - pcmpeqd %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm1 - - test %eax, %eax - jnz L(Shl8LoopExit) - - palignr $8, %xmm3, %xmm2 - movaps %xmm2, (%edx) - movaps 24(%ecx), %xmm2 - - pcmpeqd %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 - - test %eax, %eax - jnz L(Shl8LoopExit) - - palignr $8, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 24(%ecx), %xmm2 - - pcmpeqd %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - - test %eax, %eax - jnz L(Shl8LoopExit) - - palignr $8, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 24(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -8(%ecx), %ecx - sub %eax, %edx - - movaps -8(%ecx), %xmm1 - -L(Shl8LoopStart): - movaps 8(%ecx), %xmm2 - movaps 24(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 40(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 56(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqd %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $8, %xmm4, %xmm5 - test %eax, %eax - palignr $8, %xmm3, %xmm4 - jnz L(Shl8Start) - - palignr $8, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $8, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl8LoopStart) - -L(Shl8LoopExit): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - POP (%esi) - add $8, %edx - add $8, %ecx - test %al, %al - jz L(ExitHigh) - test $0x01, %al - jnz L(Exit4) - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl %edi, %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(Shl12): - movaps -12(%ecx), %xmm1 - movaps 4(%ecx), %xmm2 -L(Shl12Start): - pcmpeqd %xmm2, %xmm0 - pmovmskb %xmm0, %eax - movaps %xmm2, %xmm3 - - test %eax, %eax - jnz L(Shl12LoopExit) - - palignr $12, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 20(%ecx), %xmm2 - - pcmpeqd %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm1 - - test %eax, %eax - jnz L(Shl12LoopExit) - - palignr $12, %xmm3, %xmm2 - movaps %xmm2, (%edx) - movaps 20(%ecx), %xmm2 - - pcmpeqd %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - movaps %xmm2, %xmm3 - - test %eax, %eax - jnz L(Shl12LoopExit) - - palignr $12, %xmm1, %xmm2 - movaps %xmm2, (%edx) - movaps 20(%ecx), %xmm2 - - pcmpeqd %xmm2, %xmm0 - lea 16(%edx), %edx - pmovmskb %xmm0, %eax - lea 16(%ecx), %ecx - - test %eax, %eax - jnz L(Shl12LoopExit) - - palignr $12, %xmm3, %xmm2 - movaps %xmm2, (%edx) - lea 20(%ecx), %ecx - lea 16(%edx), %edx - - mov %ecx, %eax - and $-0x40, %ecx - sub %ecx, %eax - lea -4(%ecx), %ecx - sub %eax, %edx - - movaps -12(%ecx), %xmm1 - -L(Shl12LoopStart): - movaps 4(%ecx), %xmm2 - movaps 20(%ecx), %xmm3 - movaps %xmm3, %xmm6 - movaps 36(%ecx), %xmm4 - movaps %xmm4, %xmm7 - movaps 52(%ecx), %xmm5 - pminub %xmm2, %xmm6 - pminub %xmm5, %xmm7 - pminub %xmm6, %xmm7 - pcmpeqd %xmm0, %xmm7 - pmovmskb %xmm7, %eax - movaps %xmm5, %xmm7 - palignr $12, %xmm4, %xmm5 - test %eax, %eax - palignr $12, %xmm3, %xmm4 - jnz L(Shl12Start) - - palignr $12, %xmm2, %xmm3 - lea 64(%ecx), %ecx - palignr $12, %xmm1, %xmm2 - movaps %xmm7, %xmm1 - movaps %xmm5, 48(%edx) - movaps %xmm4, 32(%edx) - movaps %xmm3, 16(%edx) - movaps %xmm2, (%edx) - lea 64(%edx), %edx - jmp L(Shl12LoopStart) - -L(Shl12LoopExit): - movl (%ecx), %esi - movl %esi, (%edx) - mov $4, %esi - - .p2align 4 -L(CopyFrom1To16Bytes): - add %esi, %edx - add %esi, %ecx - - POP (%esi) - test %al, %al - jz L(ExitHigh) - test $0x01, %al - jnz L(Exit4) -L(Exit8): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl %edi, %eax - RETURN - - .p2align 4 -L(ExitHigh): - test $0x01, %ah - jnz L(Exit12) -L(Exit16): - movdqu (%ecx), %xmm0 - movdqu %xmm0, (%edx) - movl %edi, %eax - RETURN - - .p2align 4 -L(Exit4): - movl (%ecx), %eax - movl %eax, (%edx) - movl %edi, %eax - RETURN - - .p2align 4 -L(Exit12): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl 8(%ecx), %eax - movl %eax, 8(%edx) - movl %edi, %eax - RETURN - -CFI_POP (%edi) - - .p2align 4 -L(ExitTail4): - movl (%ecx), %eax - movl %eax, (%edx) - movl %edx, %eax - ret - - .p2align 4 -L(ExitTail8): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl %edx, %eax - ret - - .p2align 4 -L(ExitTail12): - movlpd (%ecx), %xmm0 - movlpd %xmm0, (%edx) - movl 8(%ecx), %eax - movl %eax, 8(%edx) - movl %edx, %eax - ret - - .p2align 4 -L(ExitTail16): - movdqu (%ecx), %xmm0 - movdqu %xmm0, (%edx) - movl %edx, %eax - ret - -END (__wcscpy_ssse3) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcscpy.S b/sysdeps/i386/i686/multiarch/wcscpy.S deleted file mode 100644 index cfc97dd87c..0000000000 --- a/sysdeps/i386/i686/multiarch/wcscpy.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of wcscpy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in libc. */ -#if IS_IN (libc) - .text -ENTRY(wcscpy) - .type wcscpy, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__wcscpy_ia32) - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__wcscpy_ssse3) -2: ret -END(wcscpy) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcslen-c.c b/sysdeps/i386/i686/multiarch/wcslen-c.c deleted file mode 100644 index a335dc0f7e..0000000000 --- a/sysdeps/i386/i686/multiarch/wcslen-c.c +++ /dev/null @@ -1,9 +0,0 @@ -#include <wchar.h> - -#if IS_IN (libc) -# define WCSLEN __wcslen_ia32 -#endif - -extern __typeof (wcslen) __wcslen_ia32; - -#include "wcsmbs/wcslen.c" diff --git a/sysdeps/i386/i686/multiarch/wcslen-sse2.S b/sysdeps/i386/i686/multiarch/wcslen-sse2.S deleted file mode 100644 index bd3fc4c79b..0000000000 --- a/sysdeps/i386/i686/multiarch/wcslen-sse2.S +++ /dev/null @@ -1,193 +0,0 @@ -/* wcslen with SSE2 - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) -# include <sysdep.h> -# define STR 4 - - .text -ENTRY (__wcslen_sse2) - mov STR(%esp), %edx - - cmp $0, (%edx) - jz L(exit_tail0) - cmp $0, 4(%edx) - jz L(exit_tail1) - cmp $0, 8(%edx) - jz L(exit_tail2) - cmp $0, 12(%edx) - jz L(exit_tail3) - cmp $0, 16(%edx) - jz L(exit_tail4) - cmp $0, 20(%edx) - jz L(exit_tail5) - cmp $0, 24(%edx) - jz L(exit_tail6) - cmp $0, 28(%edx) - jz L(exit_tail7) - - pxor %xmm0, %xmm0 - - lea 32(%edx), %eax - lea 16(%edx), %ecx - and $-16, %eax - - pcmpeqd (%eax), %xmm0 - pmovmskb %xmm0, %edx - pxor %xmm1, %xmm1 - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqd (%eax), %xmm1 - pmovmskb %xmm1, %edx - pxor %xmm2, %xmm2 - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqd (%eax), %xmm2 - pmovmskb %xmm2, %edx - pxor %xmm3, %xmm3 - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqd (%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - and $-0x40, %eax - - .p2align 4 -L(aligned_64_loop): - movaps (%eax), %xmm0 - movaps 16(%eax), %xmm1 - movaps 32(%eax), %xmm2 - movaps 48(%eax), %xmm6 - - pminub %xmm1, %xmm0 - pminub %xmm6, %xmm2 - pminub %xmm0, %xmm2 - pcmpeqd %xmm3, %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - lea 64(%eax), %eax - jz L(aligned_64_loop) - - pcmpeqd -64(%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 48(%ecx), %ecx - jnz L(exit) - - pcmpeqd %xmm1, %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea -16(%ecx), %ecx - jnz L(exit) - - pcmpeqd -32(%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea -16(%ecx), %ecx - jnz L(exit) - - pcmpeqd %xmm6, %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea -16(%ecx), %ecx - jnz L(exit) - - jmp L(aligned_64_loop) - - .p2align 4 -L(exit): - sub %ecx, %eax - shr $2, %eax - test %dl, %dl - jz L(exit_high) - - mov %dl, %cl - and $15, %cl - jz L(exit_1) - ret - - .p2align 4 -L(exit_high): - mov %dh, %ch - and $15, %ch - jz L(exit_3) - add $2, %eax - ret - - .p2align 4 -L(exit_1): - add $1, %eax - ret - - .p2align 4 -L(exit_3): - add $3, %eax - ret - - .p2align 4 -L(exit_tail0): - xor %eax, %eax - ret - - .p2align 4 -L(exit_tail1): - mov $1, %eax - ret - - .p2align 4 -L(exit_tail2): - mov $2, %eax - ret - - .p2align 4 -L(exit_tail3): - mov $3, %eax - ret - - .p2align 4 -L(exit_tail4): - mov $4, %eax - ret - - .p2align 4 -L(exit_tail5): - mov $5, %eax - ret - - .p2align 4 -L(exit_tail6): - mov $6, %eax - ret - - .p2align 4 -L(exit_tail7): - mov $7, %eax - ret - -END (__wcslen_sse2) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcslen.S b/sysdeps/i386/i686/multiarch/wcslen.S deleted file mode 100644 index 6ef9b6e7b5..0000000000 --- a/sysdeps/i386/i686/multiarch/wcslen.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of wcslen - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if IS_IN (libc) - .text -ENTRY(__wcslen) - .type __wcslen, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__wcslen_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__wcslen_sse2) -2: ret -END(__wcslen) - -weak_alias(__wcslen, wcslen) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcsrchr-c.c b/sysdeps/i386/i686/multiarch/wcsrchr-c.c deleted file mode 100644 index 8d8a335b5b..0000000000 --- a/sysdeps/i386/i686/multiarch/wcsrchr-c.c +++ /dev/null @@ -1,5 +0,0 @@ -#if IS_IN (libc) -# define wcsrchr __wcsrchr_ia32 -#endif - -#include "wcsmbs/wcsrchr.c" diff --git a/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S b/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S deleted file mode 100644 index 1a9b60e55e..0000000000 --- a/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S +++ /dev/null @@ -1,354 +0,0 @@ -/* wcsrchr with SSE2, without using bsf instructions. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if IS_IN (libc) -# include <sysdep.h> -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) - -# define PARMS 8 -# define ENTRANCE PUSH (%edi); -# define RETURN POP (%edi); ret; CFI_PUSH (%edi); -# define STR1 PARMS -# define STR2 STR1+4 - - atom_text_section -ENTRY (__wcsrchr_sse2) - - ENTRANCE - mov STR1(%esp), %ecx - movd STR2(%esp), %xmm1 - - mov %ecx, %edi - punpckldq %xmm1, %xmm1 - pxor %xmm2, %xmm2 - punpckldq %xmm1, %xmm1 - -/* ECX has OFFSET. */ - and $63, %ecx - cmp $48, %ecx - ja L(crosscache) - -/* unaligned string. */ - movdqu (%edi), %xmm0 - pcmpeqd %xmm0, %xmm2 - pcmpeqd %xmm1, %xmm0 -/* Find where NULL is. */ - pmovmskb %xmm2, %ecx -/* Check if there is a match. */ - pmovmskb %xmm0, %eax - add $16, %edi - - test %eax, %eax - jnz L(unaligned_match1) - - test %ecx, %ecx - jnz L(return_null) - - and $-16, %edi - - PUSH (%esi) - - xor %edx, %edx - jmp L(loop) - - CFI_POP (%esi) - - .p2align 4 -L(unaligned_match1): - test %ecx, %ecx - jnz L(prolog_find_zero_1) - - PUSH (%esi) - -/* Save current match */ - mov %eax, %edx - mov %edi, %esi - and $-16, %edi - jmp L(loop) - - CFI_POP (%esi) - - .p2align 4 -L(crosscache): -/* Hancle unaligned string. */ - and $15, %ecx - and $-16, %edi - pxor %xmm3, %xmm3 - movdqa (%edi), %xmm0 - pcmpeqd %xmm0, %xmm3 - pcmpeqd %xmm1, %xmm0 -/* Find where NULL is. */ - pmovmskb %xmm3, %edx -/* Check if there is a match. */ - pmovmskb %xmm0, %eax -/* Remove the leading bytes. */ - shr %cl, %edx - shr %cl, %eax - add $16, %edi - - test %eax, %eax - jnz L(unaligned_match) - - test %edx, %edx - jnz L(return_null) - - PUSH (%esi) - - xor %edx, %edx - jmp L(loop) - - CFI_POP (%esi) - - .p2align 4 -L(unaligned_match): - test %edx, %edx - jnz L(prolog_find_zero) - - PUSH (%esi) - - mov %eax, %edx - lea (%edi, %ecx), %esi - -/* Loop start on aligned string. */ - .p2align 4 -L(loop): - movdqa (%edi), %xmm0 - pcmpeqd %xmm0, %xmm2 - add $16, %edi - pcmpeqd %xmm1, %xmm0 - pmovmskb %xmm2, %ecx - pmovmskb %xmm0, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm3 - pcmpeqd %xmm3, %xmm2 - add $16, %edi - pcmpeqd %xmm1, %xmm3 - pmovmskb %xmm2, %ecx - pmovmskb %xmm3, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm4 - pcmpeqd %xmm4, %xmm2 - add $16, %edi - pcmpeqd %xmm1, %xmm4 - pmovmskb %xmm2, %ecx - pmovmskb %xmm4, %eax - or %eax, %ecx - jnz L(matches) - - movdqa (%edi), %xmm5 - pcmpeqd %xmm5, %xmm2 - add $16, %edi - pcmpeqd %xmm1, %xmm5 - pmovmskb %xmm2, %ecx - pmovmskb %xmm5, %eax - or %eax, %ecx - jz L(loop) - - .p2align 4 -L(matches): - test %eax, %eax - jnz L(match) -L(return_value): - test %edx, %edx - jz L(return_null_1) - mov %edx, %eax - mov %esi, %edi - - POP (%esi) - - test %ah, %ah - jnz L(match_third_or_fourth_wchar) - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(return_null_1): - POP (%esi) - - xor %eax, %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(match): - pmovmskb %xmm2, %ecx - test %ecx, %ecx - jnz L(find_zero) -/* save match info */ - mov %eax, %edx - mov %edi, %esi - jmp L(loop) - - .p2align 4 -L(find_zero): - test %cl, %cl - jz L(find_zero_in_third_or_fourth_wchar) - test $15, %cl - jz L(find_zero_in_second_wchar) - and $1, %eax - jz L(return_value) - - POP (%esi) - - lea -16(%edi), %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_in_second_wchar): - and $1 << 5 - 1, %eax - jz L(return_value) - - POP (%esi) - - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_in_third_or_fourth_wchar): - test $15, %ch - jz L(find_zero_in_fourth_wchar) - and $1 << 9 - 1, %eax - jz L(return_value) - - POP (%esi) - - test %ah, %ah - jnz L(match_third_wchar) - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(find_zero_in_fourth_wchar): - - POP (%esi) - - test %ah, %ah - jnz L(match_third_or_fourth_wchar) - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - - CFI_PUSH (%esi) - - .p2align 4 -L(match_second_wchar): - lea -12(%edi), %eax - RETURN - - .p2align 4 -L(match_third_or_fourth_wchar): - test $15 << 4, %ah - jnz L(match_fourth_wchar) - lea -8(%edi), %eax - RETURN - - .p2align 4 -L(match_third_wchar): - lea -8(%edi), %eax - RETURN - - .p2align 4 -L(match_fourth_wchar): - lea -4(%edi), %eax - RETURN - - .p2align 4 -L(return_null): - xor %eax, %eax - RETURN - - .p2align 4 -L(prolog_find_zero): - add %ecx, %edi - mov %edx, %ecx -L(prolog_find_zero_1): - test %cl, %cl - jz L(prolog_find_zero_in_third_or_fourth_wchar) - test $15, %cl - jz L(prolog_find_zero_in_second_wchar) - and $1, %eax - jz L(return_null) - - lea -16(%edi), %eax - RETURN - - .p2align 4 -L(prolog_find_zero_in_second_wchar): - and $1 << 5 - 1, %eax - jz L(return_null) - - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - - .p2align 4 -L(prolog_find_zero_in_third_or_fourth_wchar): - test $15, %ch - jz L(prolog_find_zero_in_fourth_wchar) - and $1 << 9 - 1, %eax - jz L(return_null) - - test %ah, %ah - jnz L(match_third_wchar) - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - - .p2align 4 -L(prolog_find_zero_in_fourth_wchar): - test %ah, %ah - jnz L(match_third_or_fourth_wchar) - test $15 << 4, %al - jnz L(match_second_wchar) - lea -16(%edi), %eax - RETURN - -END (__wcsrchr_sse2) -#endif diff --git a/sysdeps/i386/i686/multiarch/wcsrchr.S b/sysdeps/i386/i686/multiarch/wcsrchr.S deleted file mode 100644 index cf67333995..0000000000 --- a/sysdeps/i386/i686/multiarch/wcsrchr.S +++ /dev/null @@ -1,35 +0,0 @@ -/* Multiple versions of wcsrchr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -#if IS_IN (libc) - .text -ENTRY(wcsrchr) - .type wcsrchr, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__wcsrchr_ia32) - HAS_CPU_FEATURE (SSE2) - jz 2f - LOAD_FUNC_GOT_EAX (__wcsrchr_sse2) -2: ret -END(wcsrchr) -#endif diff --git a/sysdeps/i386/i686/multiarch/wmemcmp-c.c b/sysdeps/i386/i686/multiarch/wmemcmp-c.c deleted file mode 100644 index 75ab4b94c1..0000000000 --- a/sysdeps/i386/i686/multiarch/wmemcmp-c.c +++ /dev/null @@ -1,9 +0,0 @@ -#include <wchar.h> - -#if IS_IN (libc) -# define WMEMCMP __wmemcmp_ia32 -#endif - -extern __typeof (wmemcmp) __wmemcmp_ia32; - -#include "wcsmbs/wmemcmp.c" diff --git a/sysdeps/i386/i686/multiarch/wmemcmp-sse4.S b/sysdeps/i386/i686/multiarch/wmemcmp-sse4.S deleted file mode 100644 index 1a857c7e21..0000000000 --- a/sysdeps/i386/i686/multiarch/wmemcmp-sse4.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_WMEMCMP 1 -#define MEMCMP __wmemcmp_sse4_2 - -#include "memcmp-sse4.S" diff --git a/sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S b/sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S deleted file mode 100644 index a41ef95fc1..0000000000 --- a/sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_WMEMCMP 1 -#define MEMCMP __wmemcmp_ssse3 - -#include "memcmp-ssse3.S" diff --git a/sysdeps/i386/i686/multiarch/wmemcmp.S b/sysdeps/i386/i686/multiarch/wmemcmp.S deleted file mode 100644 index 1b9a54a413..0000000000 --- a/sysdeps/i386/i686/multiarch/wmemcmp.S +++ /dev/null @@ -1,40 +0,0 @@ -/* Multiple versions of wmemcmp - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in libc. */ - -#if IS_IN (libc) - .text -ENTRY(wmemcmp) - .type wmemcmp, @gnu_indirect_function - LOAD_GOT_AND_RTLD_GLOBAL_RO - LOAD_FUNC_GOT_EAX (__wmemcmp_ia32) - HAS_CPU_FEATURE (SSSE3) - jz 2f - LOAD_FUNC_GOT_EAX (__wmemcmp_ssse3) - HAS_CPU_FEATURE (SSE4_2) - jz 2f - LOAD_FUNC_GOT_EAX (__wmemcmp_sse4_2) -2: ret -END(wmemcmp) -#endif diff --git a/sysdeps/i386/i686/nptl/tls.h b/sysdeps/i386/i686/nptl/tls.h deleted file mode 100644 index 5b527af9d3..0000000000 --- a/sysdeps/i386/i686/nptl/tls.h +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright (C) 2002-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _TLS_H - -/* Additional definitions for <tls.h> on i686 and up. */ - - -/* Macros to load from and store into segment registers. We can use - the 32-bit instructions. */ -#define TLS_GET_GS() \ - ({ int __seg; __asm ("movl %%gs, %0" : "=q" (__seg)); __seg; }) -#define TLS_SET_GS(val) \ - __asm ("movl %0, %%gs" :: "q" (val)) - - -/* Get the full set of definitions. */ -#include_next <tls.h> - -#endif /* tls.h */ diff --git a/sysdeps/i386/i686/pthread_spin_trylock.S b/sysdeps/i386/i686/pthread_spin_trylock.S deleted file mode 100644 index ce9c94d41a..0000000000 --- a/sysdeps/i386/i686/pthread_spin_trylock.S +++ /dev/null @@ -1,20 +0,0 @@ -/* Copyright (C) 2002-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define HAVE_CMOV 1 -#include <sysdeps/i386/pthread_spin_trylock.S> diff --git a/sysdeps/i386/i686/stack-aliasing.h b/sysdeps/i386/i686/stack-aliasing.h deleted file mode 100644 index 9b5a1b0d47..0000000000 --- a/sysdeps/i386/i686/stack-aliasing.h +++ /dev/null @@ -1,23 +0,0 @@ -/* Define macros for stack address aliasing issues for NPTL. i686 version. - Copyright (C) 2014-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* What is useful is to avoid the 64k aliasing problem which reliably - happens if all stacks use sizes which are a multiple of 64k. Tell - the stack allocator to disturb this by allocation one more page if - necessary. */ -#define MULTI_PAGE_ALIASING 65536 diff --git a/sysdeps/i386/i686/strcmp.S b/sysdeps/i386/i686/strcmp.S deleted file mode 100644 index 1ae305912e..0000000000 --- a/sysdeps/i386/i686/strcmp.S +++ /dev/null @@ -1,52 +0,0 @@ -/* Highly optimized version for ix86, x>=6. - Copyright (C) 1999-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include "asm-syntax.h" - -#define PARMS 4 /* no space for saved regs */ -#define STR1 PARMS -#define STR2 STR1+4 - - .text -ENTRY (strcmp) - - movl STR1(%esp), %ecx - movl STR2(%esp), %edx - -L(oop): movb (%ecx), %al - cmpb (%edx), %al - jne L(neq) - incl %ecx - incl %edx - testb %al, %al - jnz L(oop) - - xorl %eax, %eax - /* when strings are equal, pointers rest one beyond - the end of the NUL terminators. */ - ret - -L(neq): movl $1, %eax - movl $-1, %ecx - cmovbl %ecx, %eax - - ret -END (strcmp) -libc_hidden_builtin_def (strcmp) diff --git a/sysdeps/i386/i686/tst-stack-align.h b/sysdeps/i386/i686/tst-stack-align.h deleted file mode 100644 index 51f03fe77b..0000000000 --- a/sysdeps/i386/i686/tst-stack-align.h +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright (C) 2003-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <stdio.h> -#include <stdint.h> -#ifndef __SSE__ -#include_next <tst-stack-align.h> -#else -#include <xmmintrin.h> - -#define TEST_STACK_ALIGN() \ - ({ \ - __m128 _m; \ - double _d = 12.0; \ - long double _ld = 15.0; \ - int _ret = 0; \ - printf ("__m128: %p %zu\n", &_m, __alignof (__m128)); \ - if ((((uintptr_t) &_m) & (__alignof (__m128) - 1)) != 0) \ - _ret = 1; \ - \ - printf ("double: %g %p %zu\n", _d, &_d, __alignof (double)); \ - if ((((uintptr_t) &_d) & (__alignof (double) - 1)) != 0) \ - _ret = 1; \ - \ - printf ("ldouble: %Lg %p %zu\n", _ld, &_ld, __alignof (long double)); \ - if ((((uintptr_t) &_ld) & (__alignof (long double) - 1)) != 0) \ - _ret = 1; \ - _ret; \ - }) -#endif |