summaryrefslogtreecommitdiff
path: root/sysdeps/i386/i686
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/i386/i686')
-rw-r--r--sysdeps/i386/i686/Makefile12
-rw-r--r--sysdeps/i386/i686/add_n.S110
-rw-r--r--sysdeps/i386/i686/bcopy.S3
-rw-r--r--sysdeps/i386/i686/bzero.S4
-rw-r--r--sysdeps/i386/i686/dl-hash.h79
-rw-r--r--sysdeps/i386/i686/ffs.c48
-rw-r--r--sysdeps/i386/i686/fpu/e_log.S29
-rw-r--r--sysdeps/i386/i686/fpu/e_logf.S30
-rw-r--r--sysdeps/i386/i686/fpu/e_logl.S94
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/Makefile4
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S22
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S325
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/e_expf.c37
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/libm-test-ulps2188
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/libm-test-ulps-name1
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S553
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/s_cosf.c29
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S586
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/s_sincosf.c30
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S566
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/s_sinf.c28
-rw-r--r--sysdeps/i386/i686/fpu/s_fmax.S39
-rw-r--r--sysdeps/i386/i686/fpu/s_fmaxf.S39
-rw-r--r--sysdeps/i386/i686/fpu/s_fmaxl.S58
-rw-r--r--sysdeps/i386/i686/fpu/s_fmin.S37
-rw-r--r--sysdeps/i386/i686/fpu/s_fminf.S37
-rw-r--r--sysdeps/i386/i686/fpu/s_fminl.S58
-rw-r--r--sysdeps/i386/i686/hp-timing.h42
-rw-r--r--sysdeps/i386/i686/init-arch.h19
-rw-r--r--sysdeps/i386/i686/memcmp.S408
-rw-r--r--sysdeps/i386/i686/memcpy.S98
-rw-r--r--sysdeps/i386/i686/memmove.S120
-rw-r--r--sysdeps/i386/i686/mempcpy.S65
-rw-r--r--sysdeps/i386/i686/memset.S100
-rw-r--r--sysdeps/i386/i686/memusage.h21
-rw-r--r--sysdeps/i386/i686/multiarch/Makefile44
-rw-r--r--sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S4
-rw-r--r--sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S4
-rw-r--r--sysdeps/i386/i686/multiarch/bcopy-ssse3.S4
-rw-r--r--sysdeps/i386/i686/multiarch/bcopy.S59
-rw-r--r--sysdeps/i386/i686/multiarch/bzero-sse2-rep.S3
-rw-r--r--sysdeps/i386/i686/multiarch/bzero-sse2.S3
-rw-r--r--sysdeps/i386/i686/multiarch/bzero.S62
-rw-r--r--sysdeps/i386/i686/multiarch/ifunc-impl-list.c376
-rw-r--r--sysdeps/i386/i686/multiarch/locale-defines.sym11
-rw-r--r--sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S502
-rw-r--r--sysdeps/i386/i686/multiarch/memchr-sse2.S709
-rw-r--r--sysdeps/i386/i686/multiarch/memchr.S65
-rw-r--r--sysdeps/i386/i686/multiarch/memcmp-sse4.S1225
-rw-r--r--sysdeps/i386/i686/multiarch/memcmp-ssse3.S2157
-rw-r--r--sysdeps/i386/i686/multiarch/memcmp.S62
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S681
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S1809
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy-ssse3.S3162
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy.S78
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy_chk.S50
-rw-r--r--sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S4
-rw-r--r--sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S4
-rw-r--r--sysdeps/i386/i686/multiarch/memmove-ssse3.S4
-rw-r--r--sysdeps/i386/i686/multiarch/memmove.S89
-rw-r--r--sysdeps/i386/i686/multiarch/memmove_chk.S94
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S4
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S4
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy-ssse3.S4
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy.S81
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy_chk.S50
-rw-r--r--sysdeps/i386/i686/multiarch/memrchr-c.c7
-rw-r--r--sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S417
-rw-r--r--sysdeps/i386/i686/multiarch/memrchr-sse2.S724
-rw-r--r--sysdeps/i386/i686/multiarch/memrchr.S45
-rw-r--r--sysdeps/i386/i686/multiarch/memset-sse2-rep.S811
-rw-r--r--sysdeps/i386/i686/multiarch/memset-sse2.S860
-rw-r--r--sysdeps/i386/i686/multiarch/memset.S75
-rw-r--r--sysdeps/i386/i686/multiarch/memset_chk.S82
-rw-r--r--sysdeps/i386/i686/multiarch/rawmemchr-sse2-bsf.S3
-rw-r--r--sysdeps/i386/i686/multiarch/rawmemchr-sse2.S3
-rw-r--r--sysdeps/i386/i686/multiarch/rawmemchr.S65
-rw-r--r--sysdeps/i386/i686/multiarch/rtld-strnlen.c1
-rw-r--r--sysdeps/i386/i686/multiarch/s_fma-fma.c27
-rw-r--r--sysdeps/i386/i686/multiarch/s_fma.c34
-rw-r--r--sysdeps/i386/i686/multiarch/s_fmaf-fma.c27
-rw-r--r--sysdeps/i386/i686/multiarch/s_fmaf.c34
-rw-r--r--sysdeps/i386/i686/multiarch/sched_cpucount.c1
-rw-r--r--sysdeps/i386/i686/multiarch/stpcpy-sse2.S3
-rw-r--r--sysdeps/i386/i686/multiarch/stpcpy-ssse3.S3
-rw-r--r--sysdeps/i386/i686/multiarch/stpcpy.S9
-rw-r--r--sysdeps/i386/i686/multiarch/stpncpy-sse2.S4
-rw-r--r--sysdeps/i386/i686/multiarch/stpncpy-ssse3.S4
-rw-r--r--sysdeps/i386/i686/multiarch/stpncpy.S8
-rw-r--r--sysdeps/i386/i686/multiarch/strcasecmp-c.c12
-rw-r--r--sysdeps/i386/i686/multiarch/strcasecmp.S39
-rw-r--r--sysdeps/i386/i686/multiarch/strcasecmp_l-c.c13
-rw-r--r--sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S2
-rw-r--r--sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S2
-rw-r--r--sysdeps/i386/i686/multiarch/strcasecmp_l.S7
-rw-r--r--sysdeps/i386/i686/multiarch/strcat-sse2.S1245
-rw-r--r--sysdeps/i386/i686/multiarch/strcat-ssse3.S572
-rw-r--r--sysdeps/i386/i686/multiarch/strcat.S92
-rw-r--r--sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S158
-rw-r--r--sysdeps/i386/i686/multiarch/strchr-sse2.S348
-rw-r--r--sysdeps/i386/i686/multiarch/strchr.S57
-rw-r--r--sysdeps/i386/i686/multiarch/strcmp-sse4.S804
-rw-r--r--sysdeps/i386/i686/multiarch/strcmp-ssse3.S2810
-rw-r--r--sysdeps/i386/i686/multiarch/strcmp.S95
-rw-r--r--sysdeps/i386/i686/multiarch/strcpy-sse2.S2250
-rw-r--r--sysdeps/i386/i686/multiarch/strcpy-ssse3.S3901
-rw-r--r--sysdeps/i386/i686/multiarch/strcpy.S116
-rw-r--r--sysdeps/i386/i686/multiarch/strcspn-c.c2
-rw-r--r--sysdeps/i386/i686/multiarch/strcspn.S75
-rw-r--r--sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S125
-rw-r--r--sysdeps/i386/i686/multiarch/strlen-sse2.S695
-rw-r--r--sysdeps/i386/i686/multiarch/strlen.S60
-rw-r--r--sysdeps/i386/i686/multiarch/strncase-c.c8
-rw-r--r--sysdeps/i386/i686/multiarch/strncase.S39
-rw-r--r--sysdeps/i386/i686/multiarch/strncase_l-c.c13
-rw-r--r--sysdeps/i386/i686/multiarch/strncase_l-sse4.S2
-rw-r--r--sysdeps/i386/i686/multiarch/strncase_l-ssse3.S2
-rw-r--r--sysdeps/i386/i686/multiarch/strncase_l.S7
-rw-r--r--sysdeps/i386/i686/multiarch/strncat-c.c8
-rw-r--r--sysdeps/i386/i686/multiarch/strncat-sse2.S4
-rw-r--r--sysdeps/i386/i686/multiarch/strncat-ssse3.S4
-rw-r--r--sysdeps/i386/i686/multiarch/strncat.S5
-rw-r--r--sysdeps/i386/i686/multiarch/strncmp-c.c8
-rw-r--r--sysdeps/i386/i686/multiarch/strncmp-sse4.S5
-rw-r--r--sysdeps/i386/i686/multiarch/strncmp-ssse3.S5
-rw-r--r--sysdeps/i386/i686/multiarch/strncmp.S5
-rw-r--r--sysdeps/i386/i686/multiarch/strncpy-c.c8
-rw-r--r--sysdeps/i386/i686/multiarch/strncpy-sse2.S3
-rw-r--r--sysdeps/i386/i686/multiarch/strncpy-ssse3.S3
-rw-r--r--sysdeps/i386/i686/multiarch/strncpy.S5
-rw-r--r--sysdeps/i386/i686/multiarch/strnlen-c.c10
-rw-r--r--sysdeps/i386/i686/multiarch/strnlen-sse2.S3
-rw-r--r--sysdeps/i386/i686/multiarch/strnlen.S37
-rw-r--r--sysdeps/i386/i686/multiarch/strpbrk-c.c2
-rw-r--r--sysdeps/i386/i686/multiarch/strpbrk.S5
-rw-r--r--sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S282
-rw-r--r--sysdeps/i386/i686/multiarch/strrchr-sse2.S708
-rw-r--r--sysdeps/i386/i686/multiarch/strrchr.S57
-rw-r--r--sysdeps/i386/i686/multiarch/strspn-c.c2
-rw-r--r--sysdeps/i386/i686/multiarch/strspn.S56
-rw-r--r--sysdeps/i386/i686/multiarch/test-multiarch.c1
-rw-r--r--sysdeps/i386/i686/multiarch/varshift.c1
-rw-r--r--sysdeps/i386/i686/multiarch/varshift.h1
-rw-r--r--sysdeps/i386/i686/multiarch/wcschr-c.c22
-rw-r--r--sysdeps/i386/i686/multiarch/wcschr-sse2.S219
-rw-r--r--sysdeps/i386/i686/multiarch/wcschr.S36
-rw-r--r--sysdeps/i386/i686/multiarch/wcscmp-c.c14
-rw-r--r--sysdeps/i386/i686/multiarch/wcscmp-sse2.S1018
-rw-r--r--sysdeps/i386/i686/multiarch/wcscmp.S39
-rw-r--r--sysdeps/i386/i686/multiarch/wcscpy-c.c5
-rw-r--r--sysdeps/i386/i686/multiarch/wcscpy-ssse3.S600
-rw-r--r--sysdeps/i386/i686/multiarch/wcscpy.S36
-rw-r--r--sysdeps/i386/i686/multiarch/wcslen-c.c9
-rw-r--r--sysdeps/i386/i686/multiarch/wcslen-sse2.S193
-rw-r--r--sysdeps/i386/i686/multiarch/wcslen.S37
-rw-r--r--sysdeps/i386/i686/multiarch/wcsrchr-c.c5
-rw-r--r--sysdeps/i386/i686/multiarch/wcsrchr-sse2.S354
-rw-r--r--sysdeps/i386/i686/multiarch/wcsrchr.S35
-rw-r--r--sysdeps/i386/i686/multiarch/wmemcmp-c.c9
-rw-r--r--sysdeps/i386/i686/multiarch/wmemcmp-sse4.S4
-rw-r--r--sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S4
-rw-r--r--sysdeps/i386/i686/multiarch/wmemcmp.S40
-rw-r--r--sysdeps/i386/i686/nptl/tls.h35
-rw-r--r--sysdeps/i386/i686/pthread_spin_trylock.S20
-rw-r--r--sysdeps/i386/i686/stack-aliasing.h23
-rw-r--r--sysdeps/i386/i686/strcmp.S52
-rw-r--r--sysdeps/i386/i686/tst-stack-align.h44
167 files changed, 0 insertions, 38206 deletions
diff --git a/sysdeps/i386/i686/Makefile b/sysdeps/i386/i686/Makefile
deleted file mode 100644
index 311042787b..0000000000
--- a/sysdeps/i386/i686/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# So that we can test __m128's alignment
-stack-align-test-flags += -msse
-
-CFLAGS-.o += -Wa,-mtune=i686
-CFLAGS-.os += -Wa,-mtune=i686
-CFLAGS-.op += -Wa,-mtune=i686
-CFLAGS-.oS += -Wa,-mtune=i686
-
-ASFLAGS-.o += -Wa,-mtune=i686
-ASFLAGS-.os += -Wa,-mtune=i686
-ASFLAGS-.op += -Wa,-mtune=i686
-ASFLAGS-.oS += -Wa,-mtune=i686
diff --git a/sysdeps/i386/i686/add_n.S b/sysdeps/i386/i686/add_n.S
deleted file mode 100644
index 4afa648ceb..0000000000
--- a/sysdeps/i386/i686/add_n.S
+++ /dev/null
@@ -1,110 +0,0 @@
-/* Add two limb vectors of the same length > 0 and store sum in a third
- limb vector.
- Copyright (C) 1992-2017 Free Software Foundation, Inc.
- This file is part of the GNU MP Library.
-
- The GNU MP Library is free software; you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License as published by
- the Free Software Foundation; either version 2.1 of the License, or (at your
- option) any later version.
-
- The GNU MP Library is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
- License for more details.
-
- You should have received a copy of the GNU Lesser General Public License
- along with the GNU MP Library; see the file COPYING.LIB. If not,
- see <http://www.gnu.org/licenses/>. */
-
-#include "sysdep.h"
-#include "asm-syntax.h"
-
-#define PARMS 4+8 /* space for 2 saved regs */
-#define RES PARMS
-#define S1 RES+4
-#define S2 S1+4
-#define SIZE S2+4
-
- .text
-#ifdef PIC
-L(1): addl (%esp), %eax
- ret
-#endif
-ENTRY (__mpn_add_n)
-
- pushl %edi
- cfi_adjust_cfa_offset (4)
- pushl %esi
- cfi_adjust_cfa_offset (4)
-
- movl RES(%esp),%edi
- cfi_rel_offset (edi, 4)
- movl S1(%esp),%esi
- cfi_rel_offset (esi, 0)
- movl S2(%esp),%edx
- movl SIZE(%esp),%ecx
- movl %ecx,%eax
- shrl $3,%ecx /* compute count for unrolled loop */
- negl %eax
- andl $7,%eax /* get index where to start loop */
- jz L(oop) /* necessary special case for 0 */
- incl %ecx /* adjust loop count */
- shll $2,%eax /* adjustment for pointers... */
- subl %eax,%edi /* ... since they are offset ... */
- subl %eax,%esi /* ... by a constant when we ... */
- subl %eax,%edx /* ... enter the loop */
- shrl $2,%eax /* restore previous value */
-#ifdef PIC
-/* Calculate start address in loop for PIC. */
- leal (L(oop)-L(0)-3)(%eax,%eax,8),%eax
- call L(1)
-L(0):
-#else
-/* Calculate start address in loop for non-PIC. */
- leal (L(oop) - 3)(%eax,%eax,8),%eax
-#endif
- jmp *%eax /* jump into loop */
- ALIGN (3)
-L(oop): movl (%esi),%eax
- adcl (%edx),%eax
- movl %eax,(%edi)
- movl 4(%esi),%eax
- adcl 4(%edx),%eax
- movl %eax,4(%edi)
- movl 8(%esi),%eax
- adcl 8(%edx),%eax
- movl %eax,8(%edi)
- movl 12(%esi),%eax
- adcl 12(%edx),%eax
- movl %eax,12(%edi)
- movl 16(%esi),%eax
- adcl 16(%edx),%eax
- movl %eax,16(%edi)
- movl 20(%esi),%eax
- adcl 20(%edx),%eax
- movl %eax,20(%edi)
- movl 24(%esi),%eax
- adcl 24(%edx),%eax
- movl %eax,24(%edi)
- movl 28(%esi),%eax
- adcl 28(%edx),%eax
- movl %eax,28(%edi)
- leal 32(%edi),%edi
- leal 32(%esi),%esi
- leal 32(%edx),%edx
- decl %ecx
- jnz L(oop)
-
- sbbl %eax,%eax
- negl %eax
-
- popl %esi
- cfi_adjust_cfa_offset (-4)
- cfi_restore (esi)
- popl %edi
- cfi_adjust_cfa_offset (-4)
- cfi_restore (edi)
-
- ret
-END (__mpn_add_n)
diff --git a/sysdeps/i386/i686/bcopy.S b/sysdeps/i386/i686/bcopy.S
deleted file mode 100644
index 15ef9419a4..0000000000
--- a/sysdeps/i386/i686/bcopy.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_BCOPY
-#define memmove bcopy
-#include <sysdeps/i386/i686/memmove.S>
diff --git a/sysdeps/i386/i686/bzero.S b/sysdeps/i386/i686/bzero.S
deleted file mode 100644
index c7898f18e0..0000000000
--- a/sysdeps/i386/i686/bzero.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_BZERO
-#define memset __bzero
-#include <sysdeps/i386/i686/memset.S>
-weak_alias (__bzero, bzero)
diff --git a/sysdeps/i386/i686/dl-hash.h b/sysdeps/i386/i686/dl-hash.h
deleted file mode 100644
index ceda785b32..0000000000
--- a/sysdeps/i386/i686/dl-hash.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/* Compute hash alue for given string according to ELF standard.
- Copyright (C) 1998-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifndef _DL_HASH_H
-#define _DL_HASH_H 1
-
-
-/* This is the hashing function specified by the ELF ABI. It is highly
- optimized for the PII processors. Though it will run on i586 it
- would be much slower than the generic C implementation. So don't
- use it. */
-static unsigned int
-__attribute__ ((unused))
-_dl_elf_hash (const char *name)
-{
- unsigned int result;
- unsigned int temp0;
- unsigned int temp1;
-
- __asm__ __volatile__
- ("movzbl (%1),%2\n\t"
- "testl %2, %2\n\t"
- "jz 1f\n\t"
- "movl %2, %0\n\t"
- "movzbl 1(%1), %2\n\t"
- "jecxz 1f\n\t"
- "shll $4, %0\n\t"
- "addl %2, %0\n\t"
- "movzbl 2(%1), %2\n\t"
- "jecxz 1f\n\t"
- "shll $4, %0\n\t"
- "addl %2, %0\n\t"
- "movzbl 3(%1), %2\n\t"
- "jecxz 1f\n\t"
- "shll $4, %0\n\t"
- "addl %2, %0\n\t"
- "movzbl 4(%1), %2\n\t"
- "jecxz 1f\n\t"
- "shll $4, %0\n\t"
- "addl $5, %1\n\t"
- "addl %2, %0\n\t"
- "movzbl (%1), %2\n\t"
- "jecxz 1f\n"
- "2:\t"
- "shll $4, %0\n\t"
- "movl $0xf0000000, %3\n\t"
- "incl %1\n\t"
- "addl %2, %0\n\t"
- "andl %0, %3\n\t"
- "andl $0x0fffffff, %0\n\t"
- "shrl $24, %3\n\t"
- "movzbl (%1), %2\n\t"
- "xorl %3, %0\n\t"
- "testl %2, %2\n\t"
- "jnz 2b\n"
- "1:\t"
- : "=&r" (result), "=r" (name), "=&c" (temp0), "=&r" (temp1)
- : "0" (0), "1" ((const unsigned char *) name));
-
- return result;
-}
-
-#endif /* dl-hash.h */
diff --git a/sysdeps/i386/i686/ffs.c b/sysdeps/i386/i686/ffs.c
deleted file mode 100644
index cbe36ff873..0000000000
--- a/sysdeps/i386/i686/ffs.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/* ffs -- find first set bit in a word, counted from least significant end.
- For Intel 80x86, x>=6.
- This file is part of the GNU C Library.
- Copyright (C) 1991-2017 Free Software Foundation, Inc.
- Contributed by Ulrich Drepper <drepper@cygnus.com>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#define ffsl __something_else
-#include <string.h>
-
-#undef ffs
-
-#ifdef __GNUC__
-
-int
-__ffs (int x)
-{
- int cnt;
- int tmp;
-
- asm ("bsfl %2,%0\n" /* Count low bits in X and store in %1. */
- "cmovel %1,%0\n" /* If number was zero, use -1 as result. */
- : "=&r" (cnt), "=r" (tmp) : "rm" (x), "1" (-1));
-
- return cnt + 1;
-}
-weak_alias (__ffs, ffs)
-libc_hidden_def (__ffs)
-libc_hidden_builtin_def (ffs)
-#undef ffsl
-weak_alias (__ffs, ffsl)
-
-#else
-#include <string/ffs.c>
-#endif
diff --git a/sysdeps/i386/i686/fpu/e_log.S b/sysdeps/i386/i686/fpu/e_log.S
deleted file mode 100644
index 73060b088c..0000000000
--- a/sysdeps/i386/i686/fpu/e_log.S
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Written by J.T. Conklin <jtc@netbsd.org>.
- * Public domain.
- *
- * Adapted for i686 instructions.
- */
-
-#include <machine/asm.h>
-
-
- .text
-ENTRY(__ieee754_log)
- fldln2 // log(2)
- fldl 4(%esp) // x : log(2)
- fucomi %st
- jp 3f
- fyl2x // log(x)
- ret
-
-3: fstp %st(1)
- ret
-END (__ieee754_log)
-
-ENTRY(__log_finite)
- fldln2 // log(2)
- fldl 4(%esp) // x : log(2)
- fyl2x // log(x)
- ret
-END(__log_finite)
diff --git a/sysdeps/i386/i686/fpu/e_logf.S b/sysdeps/i386/i686/fpu/e_logf.S
deleted file mode 100644
index 6fd39d50d3..0000000000
--- a/sysdeps/i386/i686/fpu/e_logf.S
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Written by J.T. Conklin <jtc@netbsd.org>.
- * Public domain.
- * Adapted for float by Ulrich Drepper <drepper@cygnus.com>.
- *
- * Adapted for i686 instructions.
- */
-
-#include <machine/asm.h>
-
-
- .text
-ENTRY(__ieee754_logf)
- fldln2 // log(2)
- flds 4(%esp) // x : log(2)
- fucomi %st
- jp 3f
- fyl2x // log(x)
- ret
-
-3: fstp %st(1)
- ret
-END (__ieee754_logf)
-
-ENTRY(__logf_finite)
- fldln2 // log(2)
- flds 4(%esp) // x : log(2)
- fyl2x // log(x)
- ret
-END(__logf_finite)
diff --git a/sysdeps/i386/i686/fpu/e_logl.S b/sysdeps/i386/i686/fpu/e_logl.S
deleted file mode 100644
index 7e3bc8d817..0000000000
--- a/sysdeps/i386/i686/fpu/e_logl.S
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Written by J.T. Conklin <jtc@netbsd.org>.
- * Public domain.
- *
- * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
- * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
- * Adapted for i686 instructions.
- */
-
-#include <machine/asm.h>
-
- .section .rodata.cst8,"aM",@progbits,8
-
- .p2align 3
- .type one,@object
-one: .double 1.0
- ASM_SIZE_DIRECTIVE(one)
- /* It is not important that this constant is precise. It is only
- a value which is known to be on the safe side for using the
- fyl2xp1 instruction. */
- .type limit,@object
-limit: .double 0.29
- ASM_SIZE_DIRECTIVE(limit)
-
-
-#ifdef PIC
-# define MO(op) op##@GOTOFF(%edx)
-#else
-# define MO(op) op
-#endif
-
- .text
-ENTRY(__ieee754_logl)
- fldln2 // log(2)
- fldt 4(%esp) // x : log(2)
- fucomi %st
- jp 3f
-#ifdef PIC
- LOAD_PIC_REG (dx)
-#endif
- fld %st // x : x : log(2)
- movzwl 4+8(%esp), %eax
- cmpl $0xc000, %eax
- jae 5f // x <= -2, avoid overflow from -LDBL_MAX - 1.
- fsubl MO(one) // x-1 : x : log(2)
-5: fld %st // x-1 : x-1 : x : log(2)
- fabs // |x-1| : x-1 : x : log(2)
- fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2)
- fcomip %st(1) // |x-1| : x-1 : x : log(2)
- fstp %st(0) // x-1 : x : log(2)
- jc 2f
- fxam
- fnstsw
- andb $0x45, %ah
- cmpb $0x40, %ah
- jne 4f
- fabs // log(1) is +0 in all rounding modes.
-4: fstp %st(1) // x-1 : log(2)
- fyl2xp1 // log(x)
- ret
-
-2: fstp %st(0) // x : log(2)
- fyl2x // log(x)
- ret
-
-3: fstp %st(1)
- fadd %st(0)
- ret
-END (__ieee754_logl)
-
-ENTRY(__logl_finite)
- fldln2 // log(2)
- fldt 4(%esp) // x : log(2)
-#ifdef PIC
- LOAD_PIC_REG (dx)
-#endif
- fld %st // x : x : log(2)
- fsubl MO(one) // x-1 : x : log(2)
- fld %st // x-1 : x-1 : x : log(2)
- fabs // |x-1| : x-1 : x : log(2)
- fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2)
- fcomip %st(1) // |x-1| : x-1 : x : log(2)
- fstp %st(0) // x-1 : x : log(2)
- jc 2b
- fxam
- fnstsw
- andb $0x45, %ah
- cmpb $0x40, %ah
- jne 6f
- fabs // log(1) is +0 in all rounding modes.
-6: fstp %st(1) // x-1 : log(2)
- fyl2xp1 // log(x)
- ret
-END(__logl_finite)
diff --git a/sysdeps/i386/i686/fpu/multiarch/Makefile b/sysdeps/i386/i686/fpu/multiarch/Makefile
deleted file mode 100644
index 7d9089232f..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-ifeq ($(subdir),math)
-libm-sysdep_routines += e_expf-sse2 e_expf-ia32 s_sinf-sse2 s_cosf-sse2 \
- s_sincosf-sse2
-endif
diff --git a/sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S b/sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S
deleted file mode 100644
index b486b4d1ca..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/e_expf-ia32.S
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- Copyright (C) 2012-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#define __ieee754_expf __ieee754_expf_ia32
-#define __expf_finite __expf_finite_ia32
-
-#include <sysdeps/i386/fpu/e_expf.S>
diff --git a/sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S
deleted file mode 100644
index e6bb6fa289..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S
+++ /dev/null
@@ -1,325 +0,0 @@
-/* SSE2 version of __ieee754_expf and __expf_finite
- Copyright (C) 2012-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-
-#include <sysdep.h>
-
-/* Short algorithm description:
- *
- * Let K = 64 (table size).
- * e^x = 2^(x/log(2)) = 2^n * T[j] * (1 + P(y))
- * where
- * x = m*log(2)/K + y, y in [0.0..log(2)/K]
- * m = n*K + j, m,n,j - signed integer, j in [0..K-1]
- * values of 2^(j/K) are tabulated as T[j].
- *
- * P(y) is a minimax polynomial approximation of expf(x)-1
- * on small interval [0.0..log(2)/K].
- *
- * P(y) = P3*y*y*y*y + P2*y*y*y + P1*y*y + P0*y, calculated as
- * z = y*y; P(y) = (P3*z + P1)*z + (P2*z + P0)*y
- *
- * Special cases:
- * __ieee754_expf_sse2(NaN) = NaN
- * __ieee754_expf_sse2(+INF) = +INF
- * __ieee754_expf_sse2(-INF) = 0
- * __ieee754_expf_sse2(x) = 1 for subnormals
- * for finite argument, only __ieee754_expf_sse2(0)=1 is exact
- * __ieee754_expf_sse2(x) overflows if x>700
- * __ieee754_expf_sse2(x) underflows if x<-700
- *
- * Note:
- * For |x|<700, __ieee754_expf_sse2 computes result in double precision,
- * with accuracy a bit more than needed for expf, and does not round it
- * to single precision.
- */
-
-
-#ifdef PIC
-# define MO1(symbol) L(symbol)##@GOTOFF(%edx)
-# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%edx,reg2,_scale)
-#else
-# define MO1(symbol) L(symbol)
-# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
-#endif
-
- .text
-ENTRY(__ieee754_expf_sse2)
- /* Input: single precision x on stack at address 4(%esp) */
-
-#ifdef PIC
- LOAD_PIC_REG(dx)
-#endif
-
- cvtss2sd 4(%esp), %xmm1 /* Convert x to double precision */
- mov 4(%esp), %ecx /* Copy x */
- movsd MO1(DP_KLN2), %xmm2 /* DP K/log(2) */
- movsd MO1(DP_P2), %xmm3 /* DP P2 */
- movl %ecx, %eax /* x */
- mulsd %xmm1, %xmm2 /* DP x*K/log(2) */
- andl $0x7fffffff, %ecx /* |x| */
- cmpl $0x442f0000, %ecx /* |x|<700 ? */
- movsd MO1(DP_P3), %xmm4 /* DP P3 */
- addsd MO1(DP_RS), %xmm2 /* DP x*K/log(2)+RS */
- jae L(special_paths)
-
- /* Here if |x|<700 */
- cmpl $0x31800000, %ecx /* |x|<2^(-28) ? */
- jb L(small_arg)
-
- /* Main path: here if 2^(-28)<=|x|<700 */
- cvtsd2ss %xmm2, %xmm2 /* SP x*K/log(2)+RS */
- movd %xmm2, %eax /* bits of n*K+j with trash */
- subss MO1(SP_RS), %xmm2 /* SP t=round(x*K/log(2)) */
- movl %eax, %ecx /* n*K+j with trash */
- cvtss2sd %xmm2, %xmm2 /* DP t */
- andl $0x3f, %eax /* bits of j */
- mulsd MO1(DP_NLN2K), %xmm2 /* DP -t*log(2)/K */
- andl $0xffffffc0, %ecx /* bits of n */
-#ifdef __AVX__
- vaddsd %xmm1, %xmm2, %xmm0 /* DP y=x-t*log(2)/K */
- vmulsd %xmm0, %xmm0, %xmm2 /* DP z=y*y */
-#else
- addsd %xmm1, %xmm2 /* DP y=x-t*log(2)/K */
- movaps %xmm2, %xmm0 /* DP y */
- mulsd %xmm2, %xmm2 /* DP z=y*y */
-#endif
- mulsd %xmm2, %xmm4 /* DP P3*z */
- addl $0xffc0, %ecx /* bits of n + DP exponent bias */
- mulsd %xmm2, %xmm3 /* DP P2*z */
- shrl $2, %ecx /* High 2 bytes of DP 2^n */
- pxor %xmm1, %xmm1 /* clear %xmm1 */
- addsd MO1(DP_P1), %xmm4 /* DP P3*z+P1 */
- addsd MO1(DP_P0), %xmm3 /* DP P2*z+P0 */
- pinsrw $3, %ecx, %xmm1 /* DP 2^n */
- mulsd %xmm2, %xmm4 /* DP (P3*z+P1)*z */
- mulsd %xmm3, %xmm0 /* DP (P2*z+P0)*y */
- addsd %xmm4, %xmm0 /* DP P(y) */
- mulsd MO2(DP_T,%eax,8), %xmm0 /* DP P(y)*T[j] */
- addsd MO2(DP_T,%eax,8), %xmm0 /* DP T[j]*(P(y)+1) */
- mulsd %xmm1, %xmm0 /* DP result=2^n*(T[j]*(P(y)+1)) */
- cvtsd2ss %xmm0, %xmm1
-
- lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */
- movss %xmm1, 0(%esp) /* Move result from sse... */
- flds 0(%esp) /* ...to FPU. */
- lea 4(%esp), %esp /* Return back 4 bytes of stack frame */
- ret
-
- .p2align 4
-L(small_arg):
- /* Here if 0<=|x|<2^(-28) */
- movss 4(%esp), %xmm0 /* load x */
- addss MO1(SP_ONE), %xmm0 /* 1.0 + x */
- /* Return 1.0 with inexact raised, except for x==0 */
- jmp L(epilogue)
-
- .p2align 4
-L(special_paths):
- /* Here if x is NaN, or Inf, or finite |x|>=700 */
- movss 4(%esp), %xmm0 /* load x */
-
- cmpl $0x7f800000, %ecx /* |x| is finite ? */
- jae L(arg_inf_or_nan)
-
- /* Here if finite |x|>=700 */
- testl $0x80000000, %eax /* sign of x nonzero ? */
- je L(res_overflow)
-
- /* Here if finite x<=-700 */
- movss MO1(SP_SMALL), %xmm0 /* load small value 2^(-100) */
- mulss %xmm0, %xmm0 /* Return underflowed result (zero or subnormal) */
- jmp L(epilogue)
-
- .p2align 4
-L(res_overflow):
- /* Here if finite x>=700 */
- movss MO1(SP_LARGE), %xmm0 /* load large value 2^100 */
- mulss %xmm0, %xmm0 /* Return overflowed result (Inf or max normal) */
- jmp L(epilogue)
-
- .p2align 4
-L(arg_inf_or_nan):
- /* Here if |x| is Inf or NAN */
- jne L(arg_nan) /* |x| is Inf ? */
-
- /* Here if |x| is Inf */
- shrl $31, %eax /* Get sign bit of x */
- movss MO2(SP_INF_0,%eax,4), %xmm0/* return zero or Inf, depending on sign of x */
- jmp L(epilogue)
-
- .p2align 4
-L(arg_nan):
- /* Here if |x| is NaN */
- addss %xmm0, %xmm0 /* Return x+x (raise invalid) */
-
- .p2align 4
-L(epilogue):
- lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */
- movss %xmm0, 0(%esp) /* Move result from sse... */
- flds 0(%esp) /* ...to FPU. */
- lea 4(%esp), %esp /* Return back 4 bytes of stack frame */
- ret
-END(__ieee754_expf_sse2)
-
- .section .rodata, "a"
- .p2align 3
-L(DP_T): /* table of double precision values 2^(j/K) for j=[0..K-1] */
- .long 0x00000000, 0x3ff00000
- .long 0x3e778061, 0x3ff02c9a
- .long 0xd3158574, 0x3ff059b0
- .long 0x18759bc8, 0x3ff08745
- .long 0x6cf9890f, 0x3ff0b558
- .long 0x32d3d1a2, 0x3ff0e3ec
- .long 0xd0125b51, 0x3ff11301
- .long 0xaea92de0, 0x3ff1429a
- .long 0x3c7d517b, 0x3ff172b8
- .long 0xeb6fcb75, 0x3ff1a35b
- .long 0x3168b9aa, 0x3ff1d487
- .long 0x88628cd6, 0x3ff2063b
- .long 0x6e756238, 0x3ff2387a
- .long 0x65e27cdd, 0x3ff26b45
- .long 0xf51fdee1, 0x3ff29e9d
- .long 0xa6e4030b, 0x3ff2d285
- .long 0x0a31b715, 0x3ff306fe
- .long 0xb26416ff, 0x3ff33c08
- .long 0x373aa9cb, 0x3ff371a7
- .long 0x34e59ff7, 0x3ff3a7db
- .long 0x4c123422, 0x3ff3dea6
- .long 0x21f72e2a, 0x3ff4160a
- .long 0x6061892d, 0x3ff44e08
- .long 0xb5c13cd0, 0x3ff486a2
- .long 0xd5362a27, 0x3ff4bfda
- .long 0x769d2ca7, 0x3ff4f9b2
- .long 0x569d4f82, 0x3ff5342b
- .long 0x36b527da, 0x3ff56f47
- .long 0xdd485429, 0x3ff5ab07
- .long 0x15ad2148, 0x3ff5e76f
- .long 0xb03a5585, 0x3ff6247e
- .long 0x82552225, 0x3ff66238
- .long 0x667f3bcd, 0x3ff6a09e
- .long 0x3c651a2f, 0x3ff6dfb2
- .long 0xe8ec5f74, 0x3ff71f75
- .long 0x564267c9, 0x3ff75feb
- .long 0x73eb0187, 0x3ff7a114
- .long 0x36cf4e62, 0x3ff7e2f3
- .long 0x994cce13, 0x3ff82589
- .long 0x9b4492ed, 0x3ff868d9
- .long 0x422aa0db, 0x3ff8ace5
- .long 0x99157736, 0x3ff8f1ae
- .long 0xb0cdc5e5, 0x3ff93737
- .long 0x9fde4e50, 0x3ff97d82
- .long 0x82a3f090, 0x3ff9c491
- .long 0x7b5de565, 0x3ffa0c66
- .long 0xb23e255d, 0x3ffa5503
- .long 0x5579fdbf, 0x3ffa9e6b
- .long 0x995ad3ad, 0x3ffae89f
- .long 0xb84f15fb, 0x3ffb33a2
- .long 0xf2fb5e47, 0x3ffb7f76
- .long 0x904bc1d2, 0x3ffbcc1e
- .long 0xdd85529c, 0x3ffc199b
- .long 0x2e57d14b, 0x3ffc67f1
- .long 0xdcef9069, 0x3ffcb720
- .long 0x4a07897c, 0x3ffd072d
- .long 0xdcfba487, 0x3ffd5818
- .long 0x03db3285, 0x3ffda9e6
- .long 0x337b9b5f, 0x3ffdfc97
- .long 0xe78b3ff6, 0x3ffe502e
- .long 0xa2a490da, 0x3ffea4af
- .long 0xee615a27, 0x3ffefa1b
- .long 0x5b6e4540, 0x3fff5076
- .long 0x819e90d8, 0x3fffa7c1
- .type L(DP_T), @object
- ASM_SIZE_DIRECTIVE(L(DP_T))
-
- .section .rodata.cst8,"aM",@progbits,8
- .p2align 3
-L(DP_KLN2): /* double precision K/log(2) */
- .long 0x652b82fe, 0x40571547
- .type L(DP_KLN2), @object
- ASM_SIZE_DIRECTIVE(L(DP_KLN2))
-
- .p2align 3
-L(DP_NLN2K): /* double precision -log(2)/K */
- .long 0xfefa39ef, 0xbf862e42
- .type L(DP_NLN2K), @object
- ASM_SIZE_DIRECTIVE(L(DP_NLN2K))
-
- .p2align 3
-L(DP_RS): /* double precision 2^23+2^22 */
- .long 0x00000000, 0x41680000
- .type L(DP_RS), @object
- ASM_SIZE_DIRECTIVE(L(DP_RS))
-
- .p2align 3
-L(DP_P3): /* double precision polynomial coefficient P3 */
- .long 0xeb78fa85, 0x3fa56420
- .type L(DP_P3), @object
- ASM_SIZE_DIRECTIVE(L(DP_P3))
-
- .p2align 3
-L(DP_P1): /* double precision polynomial coefficient P1 */
- .long 0x008d6118, 0x3fe00000
- .type L(DP_P1), @object
- ASM_SIZE_DIRECTIVE(L(DP_P1))
-
- .p2align 3
-L(DP_P2): /* double precision polynomial coefficient P2 */
- .long 0xda752d4f, 0x3fc55550
- .type L(DP_P2), @object
- ASM_SIZE_DIRECTIVE(L(DP_P2))
-
- .p2align 3
-L(DP_P0): /* double precision polynomial coefficient P0 */
- .long 0xffffe7c6, 0x3fefffff
- .type L(DP_P0), @object
- ASM_SIZE_DIRECTIVE(L(DP_P0))
-
- .p2align 2
-L(SP_INF_0):
- .long 0x7f800000 /* single precision Inf */
- .long 0 /* single precision zero */
- .type L(SP_INF_0), @object
- ASM_SIZE_DIRECTIVE(L(SP_INF_0))
-
- .section .rodata.cst4,"aM",@progbits,4
- .p2align 2
-L(SP_RS): /* single precision 2^23+2^22 */
- .long 0x4b400000
- .type L(SP_RS), @object
- ASM_SIZE_DIRECTIVE(L(SP_RS))
-
- .p2align 2
-L(SP_SMALL): /* single precision small value 2^(-100) */
- .long 0x0d800000
- .type L(SP_SMALL), @object
- ASM_SIZE_DIRECTIVE(L(SP_SMALL))
-
- .p2align 2
-L(SP_LARGE): /* single precision large value 2^100 */
- .long 0x71800000
- .type L(SP_LARGE), @object
- ASM_SIZE_DIRECTIVE(L(SP_LARGE))
-
- .p2align 2
-L(SP_ONE): /* single precision 1.0 */
- .long 0x3f800000
- .type L(SP_ONE), @object
- ASM_SIZE_DIRECTIVE(L(SP_ONE))
-
-strong_alias (__ieee754_expf_sse2, __expf_finite_sse2)
diff --git a/sysdeps/i386/i686/fpu/multiarch/e_expf.c b/sysdeps/i386/i686/fpu/multiarch/e_expf.c
deleted file mode 100644
index 388cf98a39..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/e_expf.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of expf
- Copyright (C) 2012-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <init-arch.h>
-
-extern double __ieee754_expf_sse2 (double);
-extern double __ieee754_expf_ia32 (double);
-
-double __ieee754_expf (double);
-libm_ifunc (__ieee754_expf,
- HAS_CPU_FEATURE (SSE2)
- ? __ieee754_expf_sse2
- : __ieee754_expf_ia32);
-
-extern double __expf_finite_sse2 (double);
-extern double __expf_finite_ia32 (double);
-
-double __expf_finite (double);
-libm_ifunc (__expf_finite,
- HAS_CPU_FEATURE (SSE2)
- ? __expf_finite_sse2
- : __expf_finite_ia32);
diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
deleted file mode 100644
index 04bc23b37b..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
+++ /dev/null
@@ -1,2188 +0,0 @@
-# Begin of automatic generation
-
-# Maximal error of functions:
-Function: "acos":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "acos_downward":
-ildouble: 2
-ldouble: 2
-
-Function: "acos_towardzero":
-ildouble: 2
-ldouble: 2
-
-Function: "acos_upward":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "acosh":
-double: 1
-idouble: 1
-ildouble: 4
-ldouble: 2
-
-Function: "acosh_downward":
-double: 1
-idouble: 1
-ildouble: 6
-ldouble: 4
-
-Function: "acosh_towardzero":
-double: 1
-idouble: 1
-ildouble: 6
-ldouble: 4
-
-Function: "acosh_upward":
-double: 1
-idouble: 1
-ildouble: 4
-ldouble: 3
-
-Function: "asin":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "asin_downward":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "asin_towardzero":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "asin_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "asinh":
-double: 1
-idouble: 1
-ildouble: 3
-ldouble: 3
-
-Function: "asinh_downward":
-double: 1
-float: 1
-idouble: 1
-ildouble: 5
-ldouble: 5
-
-Function: "asinh_towardzero":
-double: 1
-float: 1
-idouble: 1
-ildouble: 4
-ldouble: 4
-
-Function: "asinh_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 5
-ldouble: 5
-
-Function: "atan":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "atan2":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "atan2_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "atan2_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "atan2_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "atan_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "atan_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "atan_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "atanh":
-double: 1
-idouble: 1
-ildouble: 3
-ldouble: 3
-
-Function: "atanh_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 4
-
-Function: "atanh_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 5
-ldouble: 3
-
-Function: "atanh_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 5
-ldouble: 5
-
-Function: "cabs":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "cabs_downward":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "cabs_towardzero":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "cabs_upward":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "cacos":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "cacos":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "cacos_downward":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "cacos_downward":
-double: 5
-float: 3
-idouble: 5
-ifloat: 3
-ildouble: 6
-ldouble: 6
-
-Function: Real part of "cacos_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "cacos_towardzero":
-double: 4
-float: 3
-idouble: 4
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: Real part of "cacos_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "cacos_upward":
-double: 7
-float: 7
-idouble: 7
-ifloat: 7
-ildouble: 7
-ldouble: 7
-
-Function: Real part of "cacosh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "cacosh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "cacosh_downward":
-double: 4
-float: 3
-idouble: 4
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "cacosh_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "cacosh_towardzero":
-double: 4
-float: 3
-idouble: 4
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "cacosh_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "cacosh_upward":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "cacosh_upward":
-double: 3
-float: 2
-idouble: 3
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: "carg":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "carg_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "carg_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "carg_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "casin":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "casin":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "casin_downward":
-double: 3
-float: 2
-idouble: 3
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "casin_downward":
-double: 5
-float: 3
-idouble: 5
-ifloat: 3
-ildouble: 6
-ldouble: 6
-
-Function: Real part of "casin_towardzero":
-double: 3
-float: 1
-idouble: 3
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "casin_towardzero":
-double: 4
-float: 3
-idouble: 4
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: Real part of "casin_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "casin_upward":
-double: 7
-float: 7
-idouble: 7
-ifloat: 7
-ildouble: 7
-ldouble: 7
-
-Function: Real part of "casinh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "casinh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "casinh_downward":
-double: 5
-float: 3
-idouble: 5
-ifloat: 3
-ildouble: 6
-ldouble: 6
-
-Function: Imaginary part of "casinh_downward":
-double: 3
-float: 2
-idouble: 3
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "casinh_towardzero":
-double: 4
-float: 3
-idouble: 4
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "casinh_towardzero":
-double: 3
-float: 1
-idouble: 3
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "casinh_upward":
-double: 7
-float: 7
-idouble: 7
-ifloat: 7
-ildouble: 7
-ldouble: 7
-
-Function: Imaginary part of "casinh_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "catan":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "catan":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "catan_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "catan_downward":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "catan_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "catan_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "catan_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "catan_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "catanh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "catanh":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "catanh_downward":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Imaginary part of "catanh_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "catanh_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Imaginary part of "catanh_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "catanh_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 4
-ldouble: 4
-
-Function: Imaginary part of "catanh_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "cbrt":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: "cbrt_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: "cbrt_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: "cbrt_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "ccos":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "ccos":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "ccos_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "ccos_downward":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "ccos_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "ccos_towardzero":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "ccos_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "ccos_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "ccosh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "ccosh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "ccosh_downward":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "ccosh_downward":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "ccosh_towardzero":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "ccosh_towardzero":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "ccosh_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "ccosh_upward":
-double: 3
-float: 2
-idouble: 3
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "cexp":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "cexp":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "cexp_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "cexp_downward":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "cexp_towardzero":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "cexp_towardzero":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "cexp_upward":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "cexp_upward":
-double: 3
-float: 2
-idouble: 3
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "clog":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "clog":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "clog10":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 4
-ldouble: 4
-
-Function: Imaginary part of "clog10":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "clog10_downward":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 8
-ldouble: 8
-
-Function: Imaginary part of "clog10_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "clog10_towardzero":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 8
-ldouble: 8
-
-Function: Imaginary part of "clog10_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "clog10_upward":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 7
-ldouble: 7
-
-Function: Imaginary part of "clog10_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "clog_downward":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "clog_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "clog_towardzero":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "clog_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "clog_upward":
-double: 2
-float: 3
-idouble: 2
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-Function: Imaginary part of "clog_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "cos":
-ildouble: 1
-ldouble: 1
-
-Function: "cos_downward":
-double: 1
-idouble: 1
-ildouble: 3
-ldouble: 3
-
-Function: "cos_towardzero":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "cos_upward":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "cosh":
-double: 1
-float: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "cosh_downward":
-double: 2
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 3
-
-Function: "cosh_towardzero":
-double: 2
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "cosh_upward":
-double: 4
-float: 2
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 3
-
-Function: Real part of "cpow":
-double: 2
-float: 5
-idouble: 2
-ifloat: 5
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "cpow":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "cpow_downward":
-double: 5
-float: 8
-idouble: 5
-ifloat: 8
-ildouble: 7
-ldouble: 7
-
-Function: Imaginary part of "cpow_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "cpow_towardzero":
-double: 5
-float: 8
-idouble: 5
-ifloat: 8
-ildouble: 7
-ldouble: 7
-
-Function: Imaginary part of "cpow_towardzero":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "cpow_upward":
-double: 4
-float: 1
-idouble: 4
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "cpow_upward":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "csin":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "csin":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-
-Function: Real part of "csin_downward":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "csin_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "csin_towardzero":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "csin_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "csin_upward":
-double: 3
-float: 2
-idouble: 3
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "csin_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "csinh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "csinh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "csinh_downward":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "csinh_downward":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "csinh_towardzero":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "csinh_towardzero":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "csinh_upward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "csinh_upward":
-double: 3
-float: 2
-idouble: 3
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "csqrt":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "csqrt":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "csqrt_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "csqrt_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "csqrt_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Imaginary part of "csqrt_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "csqrt_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "csqrt_upward":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "ctan":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Imaginary part of "ctan":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Real part of "ctan_downward":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "ctan_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "ctan_towardzero":
-double: 3
-float: 1
-idouble: 3
-ifloat: 1
-ildouble: 5
-ldouble: 5
-
-Function: Imaginary part of "ctan_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "ctan_upward":
-double: 3
-float: 2
-idouble: 3
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "ctan_upward":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "ctanh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: Imaginary part of "ctanh":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: Real part of "ctanh_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 4
-ldouble: 4
-
-Function: Imaginary part of "ctanh_downward":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Real part of "ctanh_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: Imaginary part of "ctanh_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Real part of "ctanh_upward":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "ctanh_upward":
-double: 3
-float: 2
-idouble: 3
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: "erf":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "erf_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "erf_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "erf_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "erfc":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: "erfc_downward":
-double: 2
-float: 3
-idouble: 2
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-Function: "erfc_towardzero":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 4
-ldouble: 4
-
-Function: "erfc_upward":
-double: 2
-float: 3
-idouble: 2
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: "exp":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "exp10":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "exp10_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "exp10_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "exp10_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "exp2":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "exp2_downward":
-ildouble: 1
-ldouble: 1
-
-Function: "exp2_towardzero":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "exp2_upward":
-ildouble: 1
-ldouble: 1
-
-Function: "exp_downward":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "exp_towardzero":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "exp_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "expm1":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "expm1_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: "expm1_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: "expm1_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: "gamma":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-Function: "gamma_downward":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 7
-ldouble: 7
-
-Function: "gamma_towardzero":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 7
-ldouble: 7
-
-Function: "gamma_upward":
-double: 3
-float: 4
-idouble: 3
-ifloat: 4
-ildouble: 5
-ldouble: 5
-
-Function: "hypot":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "hypot_downward":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "hypot_towardzero":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "hypot_upward":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "j0":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "j0_downward":
-double: 1
-float: 3
-idouble: 1
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-Function: "j0_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 5
-ldouble: 5
-
-Function: "j0_upward":
-double: 1
-float: 3
-idouble: 1
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-Function: "j1":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "j1_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 4
-ldouble: 4
-
-Function: "j1_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: "j1_upward":
-double: 2
-float: 3
-idouble: 2
-ifloat: 3
-ildouble: 3
-ldouble: 3
-
-Function: "jn":
-double: 2
-float: 3
-idouble: 2
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-Function: "jn_downward":
-double: 2
-float: 3
-idouble: 2
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-Function: "jn_towardzero":
-double: 2
-float: 3
-idouble: 2
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: "jn_upward":
-double: 2
-float: 3
-idouble: 2
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: "lgamma":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-Function: "lgamma_downward":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 7
-ldouble: 7
-
-Function: "lgamma_towardzero":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 7
-ldouble: 7
-
-Function: "lgamma_upward":
-double: 3
-float: 4
-idouble: 3
-ifloat: 4
-ildouble: 5
-ldouble: 5
-
-Function: "log":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "log10":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "log10_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "log10_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "log10_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "log1p":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "log1p_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: "log1p_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 4
-
-Function: "log1p_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: "log2":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "log2_downward":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "log2_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "log2_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "log_downward":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "log_towardzero":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "log_upward":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "pow":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "pow10":
-double: 1
-idouble: 1
-ildouble: 1
-ldouble: 1
-
-Function: "pow10_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "pow10_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "pow10_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "pow_downward":
-double: 1
-idouble: 1
-ildouble: 4
-ldouble: 4
-
-Function: "pow_towardzero":
-double: 1
-idouble: 1
-ildouble: 4
-ldouble: 4
-
-Function: "pow_upward":
-double: 1
-idouble: 1
-ildouble: 4
-ldouble: 4
-
-Function: "sin":
-ildouble: 1
-ldouble: 1
-
-Function: "sin_downward":
-double: 1
-idouble: 1
-ildouble: 3
-ldouble: 3
-
-Function: "sin_towardzero":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "sin_upward":
-double: 1
-idouble: 1
-ildouble: 3
-ldouble: 3
-
-Function: "sincos":
-ildouble: 1
-ldouble: 1
-
-Function: "sincos_downward":
-double: 1
-idouble: 1
-ildouble: 3
-ldouble: 3
-
-Function: "sincos_towardzero":
-double: 1
-idouble: 1
-ildouble: 2
-ldouble: 2
-
-Function: "sincos_upward":
-double: 1
-idouble: 1
-ildouble: 3
-ldouble: 3
-
-Function: "sinh":
-double: 1
-ildouble: 2
-ldouble: 2
-
-Function: "sinh_downward":
-double: 2
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 5
-
-Function: "sinh_towardzero":
-double: 2
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 4
-
-Function: "sinh_upward":
-double: 4
-float: 2
-idouble: 1
-ifloat: 1
-ildouble: 4
-ldouble: 5
-
-Function: "tan":
-float: 1
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "tan_downward":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: "tan_towardzero":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: "tan_upward":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: "tanh":
-double: 1
-idouble: 1
-ildouble: 3
-ldouble: 3
-
-Function: "tanh_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 7
-ldouble: 4
-
-Function: "tanh_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: "tanh_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 5
-ldouble: 4
-
-Function: "tgamma":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: "tgamma_downward":
-double: 3
-float: 4
-idouble: 3
-ifloat: 4
-ildouble: 5
-ldouble: 5
-
-Function: "tgamma_towardzero":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 5
-ldouble: 5
-
-Function: "tgamma_upward":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 5
-ldouble: 5
-
-Function: "y0":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 1
-ldouble: 1
-
-Function: "y0_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 5
-ldouble: 5
-
-Function: "y0_towardzero":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 5
-ldouble: 5
-
-Function: "y0_upward":
-double: 1
-float: 2
-idouble: 1
-ifloat: 2
-ildouble: 3
-ldouble: 3
-
-Function: "y1":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
-
-Function: "y1_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 7
-ldouble: 7
-
-Function: "y1_towardzero":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 5
-ldouble: 5
-
-Function: "y1_upward":
-double: 1
-float: 3
-idouble: 1
-ifloat: 3
-ildouble: 7
-ldouble: 7
-
-Function: "yn":
-double: 2
-float: 3
-idouble: 2
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-Function: "yn_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 5
-ldouble: 5
-
-Function: "yn_towardzero":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 5
-ldouble: 5
-
-Function: "yn_upward":
-double: 3
-float: 3
-idouble: 3
-ifloat: 3
-ildouble: 4
-ldouble: 4
-
-# end of automatic generation
diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps-name b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps-name
deleted file mode 100644
index 193dd704b3..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps-name
+++ /dev/null
@@ -1 +0,0 @@
-i686
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S
deleted file mode 100644
index f37850d0b3..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S
+++ /dev/null
@@ -1,553 +0,0 @@
-/* Optimized with sse2 version of cosf
- Copyright (C) 2012-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#define __need_Emath
-#include <bits/errno.h>
-
-/* Short algorithm description:
- *
- * 1) if |x| == 0: return 1.0-|x|.
- * 2) if |x| < 2^-27: return 1.0-|x|.
- * 3) if |x| < 2^-5 : return 1.0+x^2*DP_COS2_0+x^5*DP_COS2_1.
- * 4) if |x| < Pi/4: return 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
- * 5) if |x| < 9*Pi/4:
- * 5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+3,
- * t=|x|-j*Pi/4.
- * 5.2) Reconstruction:
- * s = (-1.0)^((n>>2)&1)
- * if(n&2 != 0) {
- * using cos(t) polynomial for |t|<Pi/4, result is
- * s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
- * } else {
- * using sin(t) polynomial for |t|<Pi/4, result is
- * s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
- * }
- * 6) if |x| < 2^23, large args:
- * 6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
- * t=|x|-j*Pi/4.
- * 6.2) Reconstruction same as (5.2).
- * 7) if |x| >= 2^23, very large args:
- * 7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
- * t=|x|-j*Pi/4.
- * 7.2) Reconstruction same as (5.2).
- * 8) if x is Inf, return x-x, and set errno=EDOM.
- * 9) if x is NaN, return x-x.
- *
- * Special cases:
- * cos(+-0) = 1 not raising inexact,
- * cos(subnormal) raises inexact,
- * cos(min_normalized) raises inexact,
- * cos(normalized) raises inexact,
- * cos(Inf) = NaN, raises invalid, sets errno to EDOM,
- * cos(NaN) = NaN.
- */
-
-#ifdef PIC
-# define MO1(symbol) L(symbol)##@GOTOFF(%ebx)
-# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale)
-# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
-# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG)
-# define PUSH(REG) pushl REG; CFI_PUSH(REG)
-# define POP(REG) popl REG; CFI_POP(REG)
-# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx)
-# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx)
-# define ARG_X 8(%esp)
-#else
-# define MO1(symbol) L(symbol)
-# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
-# define ENTRANCE
-# define RETURN ret
-# define ARG_X 4(%esp)
-#endif
-
- .text
-ENTRY(__cosf_sse2)
- /* Input: single precision x on stack at address ARG_X */
-
- ENTRANCE
- movl ARG_X, %eax /* Bits of x */
- cvtss2sd ARG_X, %xmm0 /* DP x */
- andl $0x7fffffff, %eax /* |x| */
-
- cmpl $0x3f490fdb, %eax /* |x|<Pi/4? */
- jb L(arg_less_pio4)
-
- /* Here if |x|>=Pi/4 */
- movd %eax, %xmm3 /* SP |x| */
- andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */
- movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */
-
- cmpl $0x40e231d6, %eax /* |x|<9*Pi/4? */
- jae L(large_args)
-
- /* Here if Pi/4<=|x|<9*Pi/4 */
- mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */
- cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */
- addl $1, %eax /* k+1 */
- movl $0x0e, %edx
- andl %eax, %edx /* j = (k+1)&0x0e */
- addl $2, %eax /* n */
- subsd MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */
-
-L(reconstruction):
- /* Input: %eax=n, %xmm0=t */
- testl $2, %eax /* n&2 != 0? */
- jz L(sin_poly)
-
-/*L(cos_poly):*/
- /* Here if cos(x) calculated using cos(t) polynomial for |t|<Pi/4:
- * y = t*t; z = y*y;
- * s = sign(x) * (-1.0)^((n>>2)&1)
- * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
- */
- shrl $2, %eax /* n>>2 */
- mulsd %xmm0, %xmm0 /* y=t^2 */
- andl $1, %eax /* (n>>2)&1 */
- movaps %xmm0, %xmm1 /* y */
- mulsd %xmm0, %xmm0 /* z=t^4 */
-
- movsd MO1(DP_C4), %xmm4 /* C4 */
- mulsd %xmm0, %xmm4 /* z*C4 */
- movsd MO1(DP_C3), %xmm3 /* C3 */
- mulsd %xmm0, %xmm3 /* z*C3 */
- addsd MO1(DP_C2), %xmm4 /* C2+z*C4 */
- mulsd %xmm0, %xmm4 /* z*(C2+z*C4) */
- lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */
- addsd MO1(DP_C1), %xmm3 /* C1+z*C3 */
- mulsd %xmm0, %xmm3 /* z*(C1+z*C3) */
- addsd MO1(DP_C0), %xmm4 /* C0+z*(C2+z*C4) */
- mulsd %xmm1, %xmm4 /* y*(C0+z*(C2+z*C4)) */
-
- addsd %xmm4, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
- /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
- addsd MO1(DP_ONES), %xmm3
-
- mulsd MO2(DP_ONES,%eax,8), %xmm3 /* DP result */
- movsd %xmm3, 0(%esp) /* Move result from sse... */
- fldl 0(%esp) /* ...to FPU. */
- /* Return back 4 bytes of stack frame */
- lea 8(%esp), %esp
- RETURN
-
- .p2align 4
-L(sin_poly):
- /* Here if cos(x) calculated using sin(t) polynomial for |t|<Pi/4:
- * y = t*t; z = y*y;
- * s = sign(x) * (-1.0)^((n>>2)&1)
- * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
- */
-
- movaps %xmm0, %xmm4 /* t */
- shrl $2, %eax /* n>>2 */
- mulsd %xmm0, %xmm0 /* y=t^2 */
- andl $1, %eax /* (n>>2)&1 */
- movaps %xmm0, %xmm1 /* y */
- mulsd %xmm0, %xmm0 /* z=t^4 */
-
- movsd MO1(DP_S4), %xmm2 /* S4 */
- mulsd %xmm0, %xmm2 /* z*S4 */
- movsd MO1(DP_S3), %xmm3 /* S3 */
- mulsd %xmm0, %xmm3 /* z*S3 */
- lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */
- addsd MO1(DP_S2), %xmm2 /* S2+z*S4 */
- mulsd %xmm0, %xmm2 /* z*(S2+z*S4) */
- addsd MO1(DP_S1), %xmm3 /* S1+z*S3 */
- mulsd %xmm0, %xmm3 /* z*(S1+z*S3) */
- addsd MO1(DP_S0), %xmm2 /* S0+z*(S2+z*S4) */
- mulsd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */
- /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
- mulsd MO2(DP_ONES,%eax,8), %xmm4
- addsd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
- /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
- mulsd %xmm4, %xmm3
- /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
- addsd %xmm4, %xmm3
- movsd %xmm3, 0(%esp) /* Move result from sse... */
- fldl 0(%esp) /* ...to FPU. */
- /* Return back 4 bytes of stack frame */
- lea 8(%esp), %esp
- RETURN
-
- .p2align 4
-L(large_args):
- /* Here if |x|>=9*Pi/4 */
- cmpl $0x7f800000, %eax /* x is Inf or NaN? */
- jae L(arg_inf_or_nan)
-
- /* Here if finite |x|>=9*Pi/4 */
- cmpl $0x4b000000, %eax /* |x|<2^23? */
- jae L(very_large_args)
-
- /* Here if 9*Pi/4<=|x|<2^23 */
- movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */
- mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */
- cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */
- addl $1, %eax /* k+1 */
- movl %eax, %edx
- andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */
- cvtsi2sdl %edx, %xmm4 /* DP j */
- movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */
- mulsd %xmm4, %xmm2 /* -j*PIO4HI */
- movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */
- addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */
- addl $2, %eax /* n */
- mulsd %xmm3, %xmm4 /* j*PIO4LO */
- addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */
- jmp L(reconstruction)
-
- .p2align 4
-L(very_large_args):
- /* Here if finite |x|>=2^23 */
-
- /* bitpos = (ix>>23) - BIAS_32 + 59; */
- shrl $23, %eax /* eb = biased exponent of x */
- /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
- subl $68, %eax
- movl $28, %ecx /* %cl=28 */
- movl %eax, %edx /* bitpos copy */
-
- /* j = bitpos/28; */
- div %cl /* j in register %al=%ax/%cl */
- movapd %xmm0, %xmm3 /* |x| */
- /* clear unneeded remainder from %ah */
- andl $0xff, %eax
-
- imull $28, %eax, %ecx /* j*28 */
- movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */
- movapd %xmm0, %xmm5 /* |x| */
- mulsd -2*8+MO2(_FPI,%eax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */
- movapd %xmm0, %xmm1 /* |x| */
- mulsd -1*8+MO2(_FPI,%eax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */
- mulsd 0*8+MO2(_FPI,%eax,8), %xmm0 /* tmp0 = FPI[j]*|x| */
- addl $19, %ecx /* j*28+19 */
- mulsd 1*8+MO2(_FPI,%eax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */
- cmpl %ecx, %edx /* bitpos>=j*28+19? */
- jl L(very_large_skip1)
-
- /* Here if bitpos>=j*28+19 */
- andpd %xmm3, %xmm4 /* HI(tmp3) */
- subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */
-L(very_large_skip1):
-
- movsd MO1(DP_2POW52), %xmm6
- movapd %xmm5, %xmm2 /* tmp2 copy */
- addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */
- movl $1, %edx
- addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */
- movsd 8+MO1(DP_2POW52), %xmm4
- movd %xmm6, %eax /* k = I64_LO(tmp6); */
- addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */
- comisd %xmm5, %xmm4 /* tmp4 > tmp5? */
- jbe L(very_large_skip2)
-
- /* Here if tmp4 > tmp5 */
- subl $1, %eax /* k-- */
- addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */
-L(very_large_skip2):
-
- andl %eax, %edx /* k&1 */
- subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */
- addsd MO2(DP_ZERONE,%edx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */
- addsd %xmm2, %xmm3 /* t += tmp2 */
- addsd %xmm3, %xmm0 /* t += tmp0 */
- addl $3, %eax /* n=k+3 */
- addsd %xmm1, %xmm0 /* t += tmp1 */
- mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */
-
- jmp L(reconstruction) /* end of very_large_args peth */
-
- .p2align 4
-L(arg_less_pio4):
- /* Here if |x|<Pi/4 */
- cmpl $0x3d000000, %eax /* |x|<2^-5? */
- jl L(arg_less_2pn5)
-
- /* Here if 2^-5<=|x|<Pi/4 */
- mulsd %xmm0, %xmm0 /* y=x^2 */
- movaps %xmm0, %xmm1 /* y */
- mulsd %xmm0, %xmm0 /* z=x^4 */
- movsd MO1(DP_C4), %xmm3 /* C4 */
- mulsd %xmm0, %xmm3 /* z*C4 */
- movsd MO1(DP_C3), %xmm5 /* C3 */
- mulsd %xmm0, %xmm5 /* z*C3 */
- addsd MO1(DP_C2), %xmm3 /* C2+z*C4 */
- mulsd %xmm0, %xmm3 /* z*(C2+z*C4) */
- addsd MO1(DP_C1), %xmm5 /* C1+z*C3 */
- mulsd %xmm0, %xmm5 /* z*(C1+z*C3) */
- addsd MO1(DP_C0), %xmm3 /* C0+z*(C2+z*C4) */
- mulsd %xmm1, %xmm3 /* y*(C0+z*(C2+z*C4)) */
- addsd %xmm5, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
- /* 1.0 + y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
- addsd MO1(DP_ONES), %xmm3
- cvtsd2ss %xmm3, %xmm3 /* SP result */
-
-L(epilogue):
- lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */
- movss %xmm3, 0(%esp) /* Move result from sse... */
- flds 0(%esp) /* ...to FPU. */
- /* Return back 4 bytes of stack frame */
- lea 4(%esp), %esp
- RETURN
-
- .p2align 4
-L(arg_less_2pn5):
- /* Here if |x|<2^-5 */
- cmpl $0x32000000, %eax /* |x|<2^-27? */
- jl L(arg_less_2pn27)
-
- /* Here if 2^-27<=|x|<2^-5 */
- mulsd %xmm0, %xmm0 /* DP x^2 */
- movsd MO1(DP_COS2_1), %xmm3 /* DP DP_COS2_1 */
- mulsd %xmm0, %xmm3 /* DP x^2*DP_COS2_1 */
- addsd MO1(DP_COS2_0), %xmm3 /* DP DP_COS2_0+x^2*DP_COS2_1 */
- mulsd %xmm0, %xmm3 /* DP x^2*DP_COS2_0+x^4*DP_COS2_1 */
- /* DP 1.0+x^2*DP_COS2_0+x^4*DP_COS2_1 */
- addsd MO1(DP_ONES), %xmm3
- cvtsd2ss %xmm3, %xmm3 /* SP result */
- jmp L(epilogue)
-
- .p2align 4
-L(arg_less_2pn27):
- /* Here if |x|<2^-27 */
- movss ARG_X, %xmm0 /* x */
- andps MO1(SP_ABS_MASK),%xmm0 /* |x| */
- movss MO1(SP_ONE), %xmm3 /* 1.0 */
- subss %xmm0, %xmm3 /* result is 1.0-|x| */
- jmp L(epilogue)
-
- .p2align 4
-L(arg_inf_or_nan):
- /* Here if |x| is Inf or NAN */
- jne L(skip_errno_setting) /* in case of x is NaN */
-
- /* Here if x is Inf. Set errno to EDOM. */
- call JUMPTARGET(__errno_location)
- movl $EDOM, (%eax)
-
- .p2align 4
-L(skip_errno_setting):
- /* Here if |x| is Inf or NAN. Continued. */
- movss ARG_X, %xmm3 /* load x */
- subss %xmm3, %xmm3 /* Result is NaN */
- jmp L(epilogue)
-END(__cosf_sse2)
-
- .section .rodata, "a"
- .p2align 3
-L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
- .long 0x00000000,0x00000000
- .long 0x54442d18,0x3fe921fb
- .long 0x54442d18,0x3ff921fb
- .long 0x7f3321d2,0x4002d97c
- .long 0x54442d18,0x400921fb
- .long 0x2955385e,0x400f6a7a
- .long 0x7f3321d2,0x4012d97c
- .long 0xe9bba775,0x4015fdbb
- .long 0x54442d18,0x401921fb
- .long 0xbeccb2bb,0x401c463a
- .long 0x2955385e,0x401f6a7a
- .type L(PIO4J), @object
- ASM_SIZE_DIRECTIVE(L(PIO4J))
-
- .p2align 3
-L(_FPI): /* 4/Pi broken into sum of positive DP values */
- .long 0x00000000,0x00000000
- .long 0x6c000000,0x3ff45f30
- .long 0x2a000000,0x3e3c9c88
- .long 0xa8000000,0x3c54fe13
- .long 0xd0000000,0x3aaf47d4
- .long 0x6c000000,0x38fbb81b
- .long 0xe0000000,0x3714acc9
- .long 0x7c000000,0x3560e410
- .long 0x56000000,0x33bca2c7
- .long 0xac000000,0x31fbd778
- .long 0xe0000000,0x300b7246
- .long 0xe8000000,0x2e5d2126
- .long 0x48000000,0x2c970032
- .long 0xe8000000,0x2ad77504
- .long 0xe0000000,0x290921cf
- .long 0xb0000000,0x274deb1c
- .long 0xe0000000,0x25829a73
- .long 0xbe000000,0x23fd1046
- .long 0x10000000,0x2224baed
- .long 0x8e000000,0x20709d33
- .long 0x80000000,0x1e535a2f
- .long 0x64000000,0x1cef904e
- .long 0x30000000,0x1b0d6398
- .long 0x24000000,0x1964ce7d
- .long 0x16000000,0x17b908bf
- .type L(_FPI), @object
- ASM_SIZE_DIRECTIVE(L(_FPI))
-
-/* Coefficients of polynomial
- for cos(x)~=1.0+x^2*DP_COS2_0+x^4*DP_COS2_1, |x|<2^-5. */
- .p2align 3
-L(DP_COS2_0):
- .long 0xff5cc6fd,0xbfdfffff
- .type L(DP_COS2_0), @object
- ASM_SIZE_DIRECTIVE(L(DP_COS2_0))
-
- .p2align 3
-L(DP_COS2_1):
- .long 0xb178dac5,0x3fa55514
- .type L(DP_COS2_1), @object
- ASM_SIZE_DIRECTIVE(L(DP_COS2_1))
-
- .p2align 3
-L(DP_ZERONE):
- .long 0x00000000,0x00000000 /* 0.0 */
- .long 0x00000000,0xbff00000 /* 1.0 */
- .type L(DP_ZERONE),@object
- ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
-
- .p2align 3
-L(DP_ONES):
- .long 0x00000000,0x3ff00000 /* +1.0 */
- .long 0x00000000,0xbff00000 /* -1.0 */
- .type L(DP_ONES), @object
- ASM_SIZE_DIRECTIVE(L(DP_ONES))
-
-/* Coefficients of polynomial
- for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4. */
- .p2align 3
-L(DP_S3):
- .long 0x64e6b5b4,0x3ec71d72
- .type L(DP_S3), @object
- ASM_SIZE_DIRECTIVE(L(DP_S3))
-
- .p2align 3
-L(DP_S1):
- .long 0x10c2688b,0x3f811111
- .type L(DP_S1), @object
- ASM_SIZE_DIRECTIVE(L(DP_S1))
-
- .p2align 3
-L(DP_S4):
- .long 0x1674b58a,0xbe5a947e
- .type L(DP_S4), @object
- ASM_SIZE_DIRECTIVE(L(DP_S4))
-
- .p2align 3
-L(DP_S2):
- .long 0x8b4bd1f9,0xbf2a019f
- .type L(DP_S2), @object
- ASM_SIZE_DIRECTIVE(L(DP_S2))
-
- .p2align 3
-L(DP_S0):
- .long 0x55551cd9,0xbfc55555
- .type L(DP_S0), @object
- ASM_SIZE_DIRECTIVE(L(DP_S0))
-
-/* Coefficients of polynomial
- for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4. */
- .p2align 3
-L(DP_C3):
- .long 0x9ac43cc0,0x3efa00eb
- .type L(DP_C3), @object
- ASM_SIZE_DIRECTIVE(L(DP_C3))
-
- .p2align 3
-L(DP_C1):
- .long 0x545c50c7,0x3fa55555
- .type L(DP_C1), @object
- ASM_SIZE_DIRECTIVE(L(DP_C1))
-
- .p2align 3
-L(DP_C4):
- .long 0xdd8844d7,0xbe923c97
- .type L(DP_C4), @object
- ASM_SIZE_DIRECTIVE(L(DP_C4))
-
- .p2align 3
-L(DP_C2):
- .long 0x348b6874,0xbf56c16b
- .type L(DP_C2), @object
- ASM_SIZE_DIRECTIVE(L(DP_C2))
-
- .p2align 3
-L(DP_C0):
- .long 0xfffe98ae,0xbfdfffff
- .type L(DP_C0), @object
- ASM_SIZE_DIRECTIVE(L(DP_C0))
-
- .p2align 3
-L(DP_PIO4):
- .long 0x54442d18,0x3fe921fb /* Pi/4 */
- .type L(DP_PIO4), @object
- ASM_SIZE_DIRECTIVE(L(DP_PIO4))
-
- .p2align 3
-L(DP_2POW52):
- .long 0x00000000,0x43300000 /* +2^52 */
- .long 0x00000000,0xc3300000 /* -2^52 */
- .type L(DP_2POW52), @object
- ASM_SIZE_DIRECTIVE(L(DP_2POW52))
-
- .p2align 3
-L(DP_INVPIO4):
- .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */
- .type L(DP_INVPIO4), @object
- ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
-
- .p2align 3
-L(DP_PIO4HI):
- .long 0x54000000,0xbfe921fb /* High part of Pi/4 */
- .type L(DP_PIO4HI), @object
- ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
-
- .p2align 3
-L(DP_PIO4LO):
- .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */
- .type L(DP_PIO4LO), @object
- ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
-
- .p2align 2
-L(SP_INVPIO4):
- .long 0x3fa2f983 /* 4/Pi */
- .type L(SP_INVPIO4), @object
- ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
-
- .p2align 4
-L(DP_ABS_MASK): /* Mask for getting DP absolute value */
- .long 0xffffffff,0x7fffffff
- .long 0xffffffff,0x7fffffff
- .type L(DP_ABS_MASK), @object
- ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
-
- .p2align 3
-L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
- .long 0x00000000,0xffffffff
- .type L(DP_HI_MASK), @object
- ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
-
- .p2align 4
-L(SP_ABS_MASK): /* Mask for getting SP absolute value */
- .long 0x7fffffff,0x7fffffff
- .long 0x7fffffff,0x7fffffff
- .type L(SP_ABS_MASK), @object
- ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
-
- .p2align 2
-L(SP_ONE):
- .long 0x3f800000 /* 1.0 */
- .type L(SP_ONE), @object
- ASM_SIZE_DIRECTIVE(L(SP_ONE))
-
-weak_alias (__cosf, cosf)
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_cosf.c b/sysdeps/i386/i686/fpu/multiarch/s_cosf.c
deleted file mode 100644
index af588de9dc..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/s_cosf.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Multiple versions of cosf
- Copyright (C) 2012-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <init-arch.h>
-
-extern float __cosf_sse2 (float);
-extern float __cosf_ia32 (float);
-float __cosf (float);
-
-libm_ifunc (__cosf, HAS_CPU_FEATURE (SSE2) ? __cosf_sse2 : __cosf_ia32);
-weak_alias (__cosf, cosf);
-
-#define COSF __cosf_ia32
-#include <sysdeps/ieee754/flt-32/s_cosf.c>
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S
deleted file mode 100644
index f31a925522..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/s_sincosf-sse2.S
+++ /dev/null
@@ -1,586 +0,0 @@
-/* Optimized with sse2 version of sincosf
- Copyright (C) 2012-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#define __need_Emath
-#include <bits/errno.h>
-
-/* Short algorithm description:
- *
- * 1) if |x|==0: sin(x)=x,
- * cos(x)=1.
- * 2) if |x|<2^-27: sin(x)=x-x*DP_SMALL, raising underflow only when needed,
- * cos(x)=1-|x|.
- * 3) if |x|<2^-5 : sin(x)=x+x*x^2*DP_SIN2_0+x^5*DP_SIN2_1,
- * cos(x)=1+1*x^2*DP_COS2_0+x^5*DP_COS2_1
- * 4) if |x|< Pi/4: sin(x)=x+x*x^2*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))),
- * cos(x)=1+1*x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
- * 5) if |x| < 9*Pi/4:
- * 5.1) Range reduction:
- * k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1, t=|x|-j*Pi/4.
- * 5.2) Reconstruction:
- * sign_sin = sign(x) * (-1.0)^(( n >>2)&1)
- * sign_cos = (-1.0)^(((n+2)>>2)&1)
- * poly_sin = ((((S4*t^2 + S3)*t^2 + S2)*t^2 + S1)*t^2 + S0)*t^2*t+t
- * poly_cos = ((((C4*t^2 + C3)*t^2 + C2)*t^2 + C1)*t^2 + C0)*t^2*s+s
- * if(n&2 != 0) {
- * using cos(t) and sin(t) polynomials for |t|<Pi/4, results are
- * cos(x) = poly_sin * sign_cos
- * sin(x) = poly_cos * sign_sin
- * } else {
- * sin(x) = poly_sin * sign_sin
- * cos(x) = poly_cos * sign_cos
- * }
- * 6) if |x| < 2^23, large args:
- * 6.1) Range reduction:
- * k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4
- * 6.2) Reconstruction same as (5.2).
- * 7) if |x| >= 2^23, very large args:
- * 7.1) Range reduction:
- * k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, t=|x|-j*Pi/4.
- * 7.2) Reconstruction same as (5.2).
- * 8) if x is Inf, return x-x, and set errno=EDOM.
- * 9) if x is NaN, return x-x.
- *
- * Special cases:
- * sin/cos(+-0) = +-0/1 not raising inexact/underflow,
- * sin/cos(subnormal) raises inexact/underflow,
- * sin/cos(min_normalized) raises inexact/underflow,
- * sin/cos(normalized) raises inexact,
- * sin/cos(Inf) = NaN, raises invalid, sets errno to EDOM,
- * sin/cos(NaN) = NaN.
- */
-
-#ifdef PIC
-# define MO1(symbol) L(symbol)##@GOTOFF(%ebx)
-# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale)
-# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
-# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG)
-# define PUSH(REG) pushl REG; CFI_PUSH(REG)
-# define POP(REG) popl REG; CFI_POP(REG)
-# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx)
-# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx)
-# define ARG_X 8(%esp)
-# define ARG_SIN_PTR 12(%esp)
-# define ARG_COS_PTR 16(%esp)
-#else
-# define MO1(symbol) L(symbol)
-# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
-# define ENTRANCE
-# define RETURN ret
-# define ARG_X 4(%esp)
-# define ARG_SIN_PTR 8(%esp)
-# define ARG_COS_PTR 12(%esp)
-#endif
-
- .text
-ENTRY(__sincosf_sse2)
- /* Input: single precision x on stack at address ARG_X */
- /* pointer to sin result on stack at address ARG_SIN_PTR */
- /* pointer to cos result on stack at address ARG_COS_PTR */
-
- ENTRANCE
- movl ARG_X, %eax /* Bits of x */
- cvtss2sd ARG_X, %xmm0 /* DP x */
- andl $0x7fffffff, %eax /* |x| */
-
- cmpl $0x3f490fdb, %eax /* |x|<Pi/4 ? */
- jb L(arg_less_pio4)
-
- /* Here if |x|>=Pi/4 */
- movd %eax, %xmm3 /* SP |x| */
- andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */
- movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */
-
- cmpl $0x40e231d6, %eax /* |x|<9*Pi/4 ? */
- jae L(large_args)
-
- /* Here if Pi/4<=|x|<9*Pi/4 */
- mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */
- movl ARG_X, %ecx /* Load x */
- cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */
- shrl $29, %ecx /* (sign of x) << 2 */
- addl $1, %eax /* k+1 */
- movl $0x0e, %edx
- andl %eax, %edx /* j = (k+1)&0x0e */
- subsd MO2(PIO4J,%edx,8), %xmm0/* t = |x| - j * Pi/4 */
-
-L(reconstruction):
- /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
-
- movaps %xmm0, %xmm4 /* t */
- movhpd MO1(DP_ONES), %xmm4 /* 1|t */
- mulsd %xmm0, %xmm0 /* y=t^2 */
- movl $2, %edx
- unpcklpd %xmm0, %xmm0 /* y|y */
- addl %eax, %edx /* k+2 */
- movaps %xmm0, %xmm1 /* y|y */
- mulpd %xmm0, %xmm0 /* z=t^4|z=t^4 */
-
- movaps MO1(DP_SC4), %xmm2 /* S4 */
- mulpd %xmm0, %xmm2 /* z*S4 */
- movaps MO1(DP_SC3), %xmm3 /* S3 */
- mulpd %xmm0, %xmm3 /* z*S3 */
- xorl %eax, %ecx /* (sign_x ^ (k>>2))<<2 */
- addpd MO1(DP_SC2), %xmm2 /* S2+z*S4 */
- mulpd %xmm0, %xmm2 /* z*(S2+z*S4) */
- shrl $2, %edx /* (k+2)>>2 */
- addpd MO1(DP_SC1), %xmm3 /* S1+z*S3 */
- mulpd %xmm0, %xmm3 /* z*(S1+z*S3) */
- shrl $2, %ecx /* sign_x ^ k>>2 */
- addpd MO1(DP_SC0), %xmm2 /* S0+z*(S2+z*S4) */
- andl $1, %edx /* sign_cos = ((k+2)>>2)&1 */
- mulpd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */
- andl $1, %ecx /* sign_sin = sign_x ^ ((k>>2)&1) */
- addpd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
- mulpd %xmm4, %xmm3 /*t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/
- testl $2, %eax /* n&2 != 0 ? */
- addpd %xmm4, %xmm3 /*t+t*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/
- jnz L(sin_result_sin_poly)
-
-/*L(sin_result_cos_poly):*/
- /*
- * Here if
- * cos(x) = poly_sin * sign_cos
- * sin(x) = poly_cos * sign_sin
- */
- movsd MO2(DP_ONES,%ecx,8), %xmm4/* 0|sign_sin */
- movhpd MO2(DP_ONES,%edx,8), %xmm4/* sign_cos|sign_sin */
- mulpd %xmm4, %xmm3 /* result_cos|result_sin */
- movl ARG_SIN_PTR, %eax
- cvtpd2ps %xmm3, %xmm0 /* SP results */
- movl ARG_COS_PTR, %ecx
- movss %xmm0, (%eax) /* store sin(x) from xmm0[0] */
- shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */
- movss %xmm0, (%ecx) /* store cos(x) */
- RETURN
-
- .p2align 4
-L(sin_result_sin_poly):
- /*
- * Here if
- * sin(x) = poly_sin * sign_sin
- * cos(x) = poly_cos * sign_cos
- */
- movsd MO2(DP_ONES,%edx,8), %xmm4/* 0|sign_cos */
- movhpd MO2(DP_ONES,%ecx,8), %xmm4/* sign_sin|sign_cos */
- mulpd %xmm4, %xmm3 /* result_sin|result_cos */
- movl ARG_SIN_PTR, %eax
- cvtpd2ps %xmm3, %xmm0 /* SP results */
- movl ARG_COS_PTR, %ecx
- movss %xmm0, (%ecx) /* store cos(x) from xmm0[0] */
- shufps $1, %xmm0, %xmm0 /* move sin(x) to xmm0[0] */
- movss %xmm0, (%eax) /* store sin(x) */
- RETURN
-
- .p2align 4
-L(large_args):
- /* Here if |x|>=9*Pi/4 */
- cmpl $0x7f800000, %eax /* x is Inf or NaN ? */
- jae L(arg_inf_or_nan)
-
- /* Here if finite |x|>=9*Pi/4 */
- cmpl $0x4b000000, %eax /* |x|<2^23 ? */
- jae L(very_large_args)
-
- /* Here if 9*Pi/4<=|x|<2^23 */
- movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */
- mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */
- cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */
- addl $1, %eax /* k+1 */
- movl %eax, %edx
- andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */
- cvtsi2sdl %edx, %xmm4 /* DP j */
- movl ARG_X, %ecx /* Load x */
- movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */
- shrl $29, %ecx /* (sign of x) << 2 */
- mulsd %xmm4, %xmm2 /* -j*PIO4HI */
- movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */
- addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */
- mulsd %xmm3, %xmm4 /* j*PIO4LO */
- addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */
- jmp L(reconstruction)
-
- .p2align 4
-L(very_large_args):
- /* Here if finite |x|>=2^23 */
-
- /* bitpos = (ix>>23) - BIAS_32 + 59; */
- shrl $23, %eax /* eb = biased exponent of x */
- subl $68, %eax /* bitpos=eb-0x7f+59, where 0x7f */
- /*is exponent bias */
- movl $28, %ecx /* %cl=28 */
- movl %eax, %edx /* bitpos copy */
-
- /* j = bitpos/28; */
- div %cl /* j in register %al=%ax/%cl */
- movapd %xmm0, %xmm3 /* |x| */
- andl $0xff, %eax /* clear unneeded remainder from %ah*/
-
- imull $28, %eax, %ecx /* j*28 */
- movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */
- movapd %xmm0, %xmm5 /* |x| */
- mulsd -2*8+MO2(_FPI,%eax,8), %xmm3/* tmp3 = FPI[j-2]*|x| */
- movapd %xmm0, %xmm1 /* |x| */
- mulsd -1*8+MO2(_FPI,%eax,8), %xmm5/* tmp2 = FPI[j-1]*|x| */
- mulsd 0*8+MO2(_FPI,%eax,8), %xmm0/* tmp0 = FPI[j]*|x| */
- addl $19, %ecx /* j*28+19 */
- mulsd 1*8+MO2(_FPI,%eax,8), %xmm1/* tmp1 = FPI[j+1]*|x| */
- cmpl %ecx, %edx /* bitpos>=j*28+19 ? */
- jl L(very_large_skip1)
-
- /* Here if bitpos>=j*28+19 */
- andpd %xmm3, %xmm4 /* HI(tmp3) */
- subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */
-L(very_large_skip1):
-
- movsd MO1(DP_2POW52), %xmm6
- movapd %xmm5, %xmm2 /* tmp2 copy */
- addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */
- movl $1, %edx
- addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */
- movsd 8+MO1(DP_2POW52), %xmm4
- movd %xmm6, %eax /* k = I64_LO(tmp6); */
- addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */
- movl ARG_X, %ecx /* Load x */
- comisd %xmm5, %xmm4 /* tmp4 > tmp5 ? */
- jbe L(very_large_skip2)
-
- /* Here if tmp4 > tmp5 */
- subl $1, %eax /* k-- */
- addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */
-L(very_large_skip2):
-
- andl %eax, %edx /* k&1 */
- subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */
- addsd MO2(DP_ZERONE,%edx,8), %xmm3/* t = DP_ZERONE[k&1] + tmp3 */
- addsd %xmm2, %xmm3 /* t += tmp2 */
- shrl $29, %ecx /* (sign of x) << 2 */
- addsd %xmm3, %xmm0 /* t += tmp0 */
- addl $1, %eax /* n=k+1 */
- addsd %xmm1, %xmm0 /* t += tmp1 */
- mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */
-
- jmp L(reconstruction) /* end of very_large_args peth */
-
- .p2align 4
-L(arg_less_pio4):
- /* Here if |x|<Pi/4 */
- cmpl $0x3d000000, %eax /* |x|<2^-5 ? */
- jl L(arg_less_2pn5)
-
- /* Here if 2^-5<=|x|<Pi/4 */
- movaps %xmm0, %xmm3 /* DP x */
- movhpd MO1(DP_ONES), %xmm3 /* DP 1|x */
- mulsd %xmm0, %xmm0 /* DP y=x^2 */
- unpcklpd %xmm0, %xmm0 /* DP y|y */
- movaps %xmm0, %xmm1 /* y|y */
- mulpd %xmm0, %xmm0 /* z=x^4|z=x^4 */
-
- movapd MO1(DP_SC4), %xmm4 /* S4 */
- mulpd %xmm0, %xmm4 /* z*S4 */
- movapd MO1(DP_SC3), %xmm5 /* S3 */
- mulpd %xmm0, %xmm5 /* z*S3 */
- addpd MO1(DP_SC2), %xmm4 /* S2+z*S4 */
- mulpd %xmm0, %xmm4 /* z*(S2+z*S4) */
- addpd MO1(DP_SC1), %xmm5 /* S1+z*S3 */
- mulpd %xmm0, %xmm5 /* z*(S1+z*S3) */
- addpd MO1(DP_SC0), %xmm4 /* S0+z*(S2+z*S4) */
- mulpd %xmm1, %xmm4 /* y*(S0+z*(S2+z*S4)) */
- mulpd %xmm3, %xmm5 /* x*z*(S1+z*S3) */
- mulpd %xmm3, %xmm4 /* x*y*(S0+z*(S2+z*S4)) */
- addpd %xmm5, %xmm4 /*x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))))*/
- movl ARG_SIN_PTR, %eax
- addpd %xmm4, %xmm3 /*x+x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4))*/
- movl ARG_COS_PTR, %ecx
- cvtpd2ps %xmm3, %xmm0 /* SP results */
- movss %xmm0, (%eax) /* store sin(x) from xmm0[0] */
- shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */
- movss %xmm0, (%ecx) /* store cos(x) */
- RETURN
-
- .p2align 4
-L(arg_less_2pn5):
- /* Here if |x|<2^-5 */
- cmpl $0x32000000, %eax /* |x|<2^-27 ? */
- jl L(arg_less_2pn27)
-
- /* Here if 2^-27<=|x|<2^-5 */
- movaps %xmm0, %xmm1 /* DP x */
- movhpd MO1(DP_ONES), %xmm1 /* DP 1|x */
- mulsd %xmm0, %xmm0 /* DP x^2 */
- unpcklpd %xmm0, %xmm0 /* DP x^2|x^2 */
-
- movaps MO1(DP_SINCOS2_1), %xmm3/* DP DP_SIN2_1 */
- mulpd %xmm0, %xmm3 /* DP x^2*DP_SIN2_1 */
- addpd MO1(DP_SINCOS2_0), %xmm3/* DP DP_SIN2_0+x^2*DP_SIN2_1 */
- mulpd %xmm0, %xmm3 /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
- mulpd %xmm1, %xmm3 /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
- addpd %xmm1, %xmm3 /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
- movl ARG_SIN_PTR, %eax
- cvtpd2ps %xmm3, %xmm0 /* SP results */
- movl ARG_COS_PTR, %ecx
- movss %xmm0, (%eax) /* store sin(x) from xmm0[0] */
- shufps $1, %xmm0, %xmm0 /* move cos(x) to xmm0[0] */
- movss %xmm0, (%ecx) /* store cos(x) */
- RETURN
-
- .p2align 4
-L(arg_less_2pn27):
- movss ARG_X, %xmm7 /* SP x */
- cmpl $0, %eax /* x=0 ? */
- je L(arg_zero) /* in case x=0 return sin(+-0)==+-0 */
- /* Here if |x|<2^-27 */
- /*
- * Special cases here:
- * sin(subnormal) raises inexact/underflow
- * sin(min_normalized) raises inexact/underflow
- * sin(normalized) raises inexact
- * cos(here)=1-|x| (raising inexact)
- */
- movaps %xmm0, %xmm3 /* DP x */
- mulsd MO1(DP_SMALL), %xmm0 /* DP x*DP_SMALL */
- subsd %xmm0, %xmm3 /* DP sin result is x-x*DP_SMALL */
- andps MO1(SP_ABS_MASK), %xmm7 /* SP |x| */
- cvtsd2ss %xmm3, %xmm0 /* sin(x) */
- movl ARG_SIN_PTR, %eax
- movss MO1(SP_ONE), %xmm1 /* SP 1.0 */
- movss %xmm0, (%eax) /* sin(x) store */
- movl ARG_COS_PTR, %ecx
- subss %xmm7, %xmm1 /* cos(x) */
- movss %xmm1, (%ecx) /* cos(x) store */
- RETURN
-
- .p2align 4
-L(arg_zero):
- movss MO1(SP_ONE), %xmm0 /* 1.0 */
- movl ARG_SIN_PTR, %eax
- movl ARG_COS_PTR, %ecx
- movss %xmm7, (%eax) /* sin(+-0)==x */
- movss %xmm0, (%ecx) /* cos(+-0)==1 */
- RETURN
-
- .p2align 4
-L(arg_inf_or_nan):
- movss ARG_X, %xmm7 /* SP x */
- /* Here if |x| is Inf or NAN */
- jne L(skip_errno_setting) /* in case of x is NaN */
-
- /* Here if x is Inf. Set errno to EDOM. */
- call JUMPTARGET(__errno_location)
- movl $EDOM, (%eax)
-
- .p2align 4
-L(skip_errno_setting):
- /* Here if |x| is Inf or NAN. Continued. */
- subss %xmm7, %xmm7 /* x-x, result is NaN */
- movl ARG_SIN_PTR, %eax
- movl ARG_COS_PTR, %ecx
- movss %xmm7, (%eax)
- movss %xmm7, (%ecx)
- RETURN
-END(__sincosf_sse2)
-
- .section .rodata, "a"
- .p2align 3
-L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
- .long 0x00000000,0x00000000
- .long 0x54442d18,0x3fe921fb
- .long 0x54442d18,0x3ff921fb
- .long 0x7f3321d2,0x4002d97c
- .long 0x54442d18,0x400921fb
- .long 0x2955385e,0x400f6a7a
- .long 0x7f3321d2,0x4012d97c
- .long 0xe9bba775,0x4015fdbb
- .long 0x54442d18,0x401921fb
- .long 0xbeccb2bb,0x401c463a
- .long 0x2955385e,0x401f6a7a
- .type L(PIO4J), @object
- ASM_SIZE_DIRECTIVE(L(PIO4J))
-
- .p2align 3
-L(_FPI): /* 4/Pi broken into sum of positive DP values */
- .long 0x00000000,0x00000000
- .long 0x6c000000,0x3ff45f30
- .long 0x2a000000,0x3e3c9c88
- .long 0xa8000000,0x3c54fe13
- .long 0xd0000000,0x3aaf47d4
- .long 0x6c000000,0x38fbb81b
- .long 0xe0000000,0x3714acc9
- .long 0x7c000000,0x3560e410
- .long 0x56000000,0x33bca2c7
- .long 0xac000000,0x31fbd778
- .long 0xe0000000,0x300b7246
- .long 0xe8000000,0x2e5d2126
- .long 0x48000000,0x2c970032
- .long 0xe8000000,0x2ad77504
- .long 0xe0000000,0x290921cf
- .long 0xb0000000,0x274deb1c
- .long 0xe0000000,0x25829a73
- .long 0xbe000000,0x23fd1046
- .long 0x10000000,0x2224baed
- .long 0x8e000000,0x20709d33
- .long 0x80000000,0x1e535a2f
- .long 0x64000000,0x1cef904e
- .long 0x30000000,0x1b0d6398
- .long 0x24000000,0x1964ce7d
- .long 0x16000000,0x17b908bf
- .type L(_FPI), @object
- ASM_SIZE_DIRECTIVE(L(_FPI))
-
-/* Coefficients of polynomials for */
-/* sin(x)~=x+x*x^2*(DP_SIN2_0+x^2*DP_SIN2_1) in low DP part, */
-/* cos(x)~=1+1*x^2*(DP_COS2_0+x^2*DP_COS2_1) in high DP part, */
-/* for |x|<2^-5. */
- .p2align 4
-L(DP_SINCOS2_0):
- .long 0x5543d49d,0xbfc55555
- .long 0xff5cc6fd,0xbfdfffff
- .type L(DP_SINCOS2_0), @object
- ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_0))
-
- .p2align 4
-L(DP_SINCOS2_1):
- .long 0x75cec8c5,0x3f8110f4
- .long 0xb178dac5,0x3fa55514
- .type L(DP_SINCOS2_1), @object
- ASM_SIZE_DIRECTIVE(L(DP_SINCOS2_1))
-
- .p2align 3
-L(DP_ZERONE):
- .long 0x00000000,0x00000000 /* 0.0 */
- .long 0x00000000,0xbff00000 /* 1.0 */
- .type L(DP_ZERONE), @object
- ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
-
- .p2align 3
-L(DP_ONES):
- .long 0x00000000,0x3ff00000 /* +1.0 */
- .long 0x00000000,0xbff00000 /* -1.0 */
- .type L(DP_ONES), @object
- ASM_SIZE_DIRECTIVE(L(DP_ONES))
-
-/* Coefficients of polynomials for */
-/* sin(t)~=t+t*t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))) in low DP part, */
-/* cos(t)~=1+1*t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))) in high DP part, */
-/* for |t|<Pi/4. */
- .p2align 4
-L(DP_SC4):
- .long 0x1674b58a,0xbe5a947e
- .long 0xdd8844d7,0xbe923c97
- .type L(DP_SC4), @object
- ASM_SIZE_DIRECTIVE(L(DP_SC4))
-
- .p2align 4
-L(DP_SC3):
- .long 0x64e6b5b4,0x3ec71d72
- .long 0x9ac43cc0,0x3efa00eb
- .type L(DP_SC3), @object
- ASM_SIZE_DIRECTIVE(L(DP_SC3))
-
- .p2align 4
-L(DP_SC2):
- .long 0x8b4bd1f9,0xbf2a019f
- .long 0x348b6874,0xbf56c16b
- .type L(DP_SC2), @object
- ASM_SIZE_DIRECTIVE(L(DP_SC2))
-
- .p2align 4
-L(DP_SC1):
- .long 0x10c2688b,0x3f811111
- .long 0x545c50c7,0x3fa55555
- .type L(DP_SC1), @object
- ASM_SIZE_DIRECTIVE(L(DP_SC1))
-
- .p2align 4
-L(DP_SC0):
- .long 0x55551cd9,0xbfc55555
- .long 0xfffe98ae,0xbfdfffff
- .type L(DP_SC0), @object
- ASM_SIZE_DIRECTIVE(L(DP_SC0))
-
- .p2align 3
-L(DP_SMALL):
- .long 0x00000000,0x3cd00000 /* 2^(-50) */
- .type L(DP_SMALL), @object
- ASM_SIZE_DIRECTIVE(L(DP_SMALL))
-
- .p2align 3
-L(DP_PIO4):
- .long 0x54442d18,0x3fe921fb /* Pi/4 */
- .type L(DP_PIO4), @object
- ASM_SIZE_DIRECTIVE(L(DP_PIO4))
-
- .p2align 3
-L(DP_2POW52):
- .long 0x00000000,0x43300000 /* +2^52 */
- .long 0x00000000,0xc3300000 /* -2^52 */
- .type L(DP_2POW52), @object
- ASM_SIZE_DIRECTIVE(L(DP_2POW52))
-
- .p2align 3
-L(DP_INVPIO4):
- .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */
- .type L(DP_INVPIO4), @object
- ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
-
- .p2align 3
-L(DP_PIO4HI):
- .long 0x54000000,0xbfe921fb /* High part of Pi/4 */
- .type L(DP_PIO4HI), @object
- ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
-
- .p2align 3
-L(DP_PIO4LO):
- .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */
- .type L(DP_PIO4LO), @object
- ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
-
- .p2align 2
-L(SP_INVPIO4):
- .long 0x3fa2f983 /* 4/Pi */
- .type L(SP_INVPIO4), @object
- ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
-
- .p2align 4
-L(DP_ABS_MASK): /* Mask for getting DP absolute value */
- .long 0xffffffff,0x7fffffff
- .long 0xffffffff,0x7fffffff
- .type L(DP_ABS_MASK), @object
- ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
-
- .p2align 3
-L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
- .long 0x00000000,0xffffffff
- .type L(DP_HI_MASK), @object
- ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
-
- .p2align 4
-L(SP_ABS_MASK): /* Mask for getting SP absolute value */
- .long 0x7fffffff,0x7fffffff
- .long 0x7fffffff,0x7fffffff
- .type L(SP_ABS_MASK), @object
- ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
-
- .p2align 2
-L(SP_ONE):
- .long 0x3f800000 /* 1.0 */
- .type L(SP_ONE), @object
- ASM_SIZE_DIRECTIVE(L(SP_ONE))
-
-weak_alias(__sincosf, sincosf)
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c b/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c
deleted file mode 100644
index 9428f9b4ea..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/s_sincosf.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Multiple versions of sincosf
- Copyright (C) 2012-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <init-arch.h>
-
-extern void __sincosf_sse2 (float, float *, float *);
-extern void __sincosf_ia32 (float, float *, float *);
-void __sincosf (float, float *, float *);
-
-libm_ifunc (__sincosf,
- HAS_CPU_FEATURE (SSE2) ? __sincosf_sse2 : __sincosf_ia32);
-weak_alias (__sincosf, sincosf);
-
-#define SINCOSF __sincosf_ia32
-#include <sysdeps/ieee754/flt-32/s_sincosf.c>
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S
deleted file mode 100644
index ee96018061..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S
+++ /dev/null
@@ -1,566 +0,0 @@
-/* Optimized with sse2 version of sinf
- Copyright (C) 2012-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#define __need_Emath
-#include <bits/errno.h>
-
-/* Short algorithm description:
- *
- * 1) if |x| == 0: return x.
- * 2) if |x| < 2^-27: return x-x*DP_SMALL, raise underflow only when needed.
- * 3) if |x| < 2^-5 : return x+x^3*DP_SIN2_0+x^5*DP_SIN2_1.
- * 4) if |x| < Pi/4: return x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))).
- * 5) if |x| < 9*Pi/4:
- * 5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1,
- * t=|x|-j*Pi/4.
- * 5.2) Reconstruction:
- * s = sign(x) * (-1.0)^((n>>2)&1)
- * if(n&2 != 0) {
- * using cos(t) polynomial for |t|<Pi/4, result is
- * s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
- * } else {
- * using sin(t) polynomial for |t|<Pi/4, result is
- * s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
- * }
- * 6) if |x| < 2^23, large args:
- * 6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
- * t=|x|-j*Pi/4.
- * 6.2) Reconstruction same as (5.2).
- * 7) if |x| >= 2^23, very large args:
- * 7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
- * t=|x|-j*Pi/4.
- * 7.2) Reconstruction same as (5.2).
- * 8) if x is Inf, return x-x, and set errno=EDOM.
- * 9) if x is NaN, return x-x.
- *
- * Special cases:
- * sin(+-0) = +-0 not raising inexact/underflow,
- * sin(subnormal) raises inexact/underflow,
- * sin(min_normalized) raises inexact/underflow,
- * sin(normalized) raises inexact,
- * sin(Inf) = NaN, raises invalid, sets errno to EDOM,
- * sin(NaN) = NaN.
- */
-
-#ifdef PIC
-# define MO1(symbol) L(symbol)##@GOTOFF(%ebx)
-# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale)
-# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
-# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG)
-# define PUSH(REG) pushl REG; CFI_PUSH(REG)
-# define POP(REG) popl REG; CFI_POP(REG)
-# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx)
-# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx)
-# define ARG_X 8(%esp)
-#else
-# define MO1(symbol) L(symbol)
-# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale)
-# define ENTRANCE
-# define RETURN ret
-# define ARG_X 4(%esp)
-#endif
-
- .text
-ENTRY(__sinf_sse2)
- /* Input: single precision x on stack at address ARG_X */
-
- ENTRANCE
- movl ARG_X, %eax /* Bits of x */
- cvtss2sd ARG_X, %xmm0 /* DP x */
- andl $0x7fffffff, %eax /* |x| */
-
- cmpl $0x3f490fdb, %eax /* |x|<Pi/4? */
- jb L(arg_less_pio4)
-
- /* Here if |x|>=Pi/4 */
- movd %eax, %xmm3 /* SP |x| */
- andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */
- movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */
-
- cmpl $0x40e231d6, %eax /* |x|<9*Pi/4? */
- jae L(large_args)
-
- /* Here if Pi/4<=|x|<9*Pi/4 */
- mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */
- movl ARG_X, %ecx /* Load x */
- cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */
- shrl $31, %ecx /* sign of x */
- addl $1, %eax /* k+1 */
- movl $0x0e, %edx
- andl %eax, %edx /* j = (k+1)&0x0e */
- subsd MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */
-
-L(reconstruction):
- /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
- testl $2, %eax /* n&2 != 0? */
- jz L(sin_poly)
-
-/*L(cos_poly):*/
- /* Here if sin(x) calculated using cos(t) polynomial for |t|<Pi/4:
- * y = t*t; z = y*y;
- * s = sign(x) * (-1.0)^((n>>2)&1)
- * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
- */
- shrl $2, %eax /* n>>2 */
- mulsd %xmm0, %xmm0 /* y=t^2 */
- andl $1, %eax /* (n>>2)&1 */
- movaps %xmm0, %xmm1 /* y */
- mulsd %xmm0, %xmm0 /* z=t^4 */
-
- movsd MO1(DP_C4), %xmm4 /* C4 */
- mulsd %xmm0, %xmm4 /* z*C4 */
- xorl %eax, %ecx /* (-1.0)^((n>>2)&1) XOR sign(x) */
- movsd MO1(DP_C3), %xmm3 /* C3 */
- mulsd %xmm0, %xmm3 /* z*C3 */
- addsd MO1(DP_C2), %xmm4 /* C2+z*C4 */
- mulsd %xmm0, %xmm4 /* z*(C2+z*C4) */
- lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */
- addsd MO1(DP_C1), %xmm3 /* C1+z*C3 */
- mulsd %xmm0, %xmm3 /* z*(C1+z*C3) */
- addsd MO1(DP_C0), %xmm4 /* C0+z*(C2+z*C4) */
- mulsd %xmm1, %xmm4 /* y*(C0+z*(C2+z*C4)) */
-
- addsd %xmm4, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
- /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
- addsd MO1(DP_ONES), %xmm3
-
- mulsd MO2(DP_ONES,%ecx,8), %xmm3 /* DP result */
- movsd %xmm3, 0(%esp) /* Move result from sse... */
- fldl 0(%esp) /* ...to FPU. */
- /* Return back 4 bytes of stack frame */
- lea 8(%esp), %esp
- RETURN
-
- .p2align 4
-L(sin_poly):
- /* Here if sin(x) calculated using sin(t) polynomial for |t|<Pi/4:
- * y = t*t; z = y*y;
- * s = sign(x) * (-1.0)^((n>>2)&1)
- * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
- */
-
- movaps %xmm0, %xmm4 /* t */
- shrl $2, %eax /* n>>2 */
- mulsd %xmm0, %xmm0 /* y=t^2 */
- andl $1, %eax /* (n>>2)&1 */
- movaps %xmm0, %xmm1 /* y */
- xorl %eax, %ecx /* (-1.0)^((n>>2)&1) XOR sign(x) */
- mulsd %xmm0, %xmm0 /* z=t^4 */
-
- movsd MO1(DP_S4), %xmm2 /* S4 */
- mulsd %xmm0, %xmm2 /* z*S4 */
- movsd MO1(DP_S3), %xmm3 /* S3 */
- mulsd %xmm0, %xmm3 /* z*S3 */
- lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */
- addsd MO1(DP_S2), %xmm2 /* S2+z*S4 */
- mulsd %xmm0, %xmm2 /* z*(S2+z*S4) */
- addsd MO1(DP_S1), %xmm3 /* S1+z*S3 */
- mulsd %xmm0, %xmm3 /* z*(S1+z*S3) */
- addsd MO1(DP_S0), %xmm2 /* S0+z*(S2+z*S4) */
- mulsd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */
- /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
- mulsd MO2(DP_ONES,%ecx,8), %xmm4
- addsd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
- /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
- mulsd %xmm4, %xmm3
- /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
- addsd %xmm4, %xmm3
- movsd %xmm3, 0(%esp) /* Move result from sse... */
- fldl 0(%esp) /* ...to FPU. */
- /* Return back 4 bytes of stack frame */
- lea 8(%esp), %esp
- RETURN
-
- .p2align 4
-L(large_args):
- /* Here if |x|>=9*Pi/4 */
- cmpl $0x7f800000, %eax /* x is Inf or NaN? */
- jae L(arg_inf_or_nan)
-
- /* Here if finite |x|>=9*Pi/4 */
- cmpl $0x4b000000, %eax /* |x|<2^23? */
- jae L(very_large_args)
-
- /* Here if 9*Pi/4<=|x|<2^23 */
- movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */
- mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */
- cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */
- addl $1, %eax /* k+1 */
- movl %eax, %edx
- andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */
- cvtsi2sdl %edx, %xmm4 /* DP j */
- movl ARG_X, %ecx /* Load x */
- movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */
- shrl $31, %ecx /* sign bit of x */
- mulsd %xmm4, %xmm2 /* -j*PIO4HI */
- movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */
- addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */
- mulsd %xmm3, %xmm4 /* j*PIO4LO */
- addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */
- jmp L(reconstruction)
-
- .p2align 4
-L(very_large_args):
- /* Here if finite |x|>=2^23 */
-
- /* bitpos = (ix>>23) - BIAS_32 + 59; */
- shrl $23, %eax /* eb = biased exponent of x */
- /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
- subl $68, %eax
- movl $28, %ecx /* %cl=28 */
- movl %eax, %edx /* bitpos copy */
-
- /* j = bitpos/28; */
- div %cl /* j in register %al=%ax/%cl */
- movapd %xmm0, %xmm3 /* |x| */
- /* clear unneeded remainder from %ah */
- andl $0xff, %eax
-
- imull $28, %eax, %ecx /* j*28 */
- movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */
- movapd %xmm0, %xmm5 /* |x| */
- mulsd -2*8+MO2(_FPI,%eax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */
- movapd %xmm0, %xmm1 /* |x| */
- mulsd -1*8+MO2(_FPI,%eax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */
- mulsd 0*8+MO2(_FPI,%eax,8), %xmm0 /* tmp0 = FPI[j]*|x| */
- addl $19, %ecx /* j*28+19 */
- mulsd 1*8+MO2(_FPI,%eax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */
- cmpl %ecx, %edx /* bitpos>=j*28+19? */
- jl L(very_large_skip1)
-
- /* Here if bitpos>=j*28+19 */
- andpd %xmm3, %xmm4 /* HI(tmp3) */
- subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */
-L(very_large_skip1):
-
- movsd MO1(DP_2POW52), %xmm6
- movapd %xmm5, %xmm2 /* tmp2 copy */
- addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */
- movl $1, %edx
- addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */
- movsd 8+MO1(DP_2POW52), %xmm4
- movd %xmm6, %eax /* k = I64_LO(tmp6); */
- addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */
- movl ARG_X, %ecx /* Load x */
- comisd %xmm5, %xmm4 /* tmp4 > tmp5? */
- jbe L(very_large_skip2)
-
- /* Here if tmp4 > tmp5 */
- subl $1, %eax /* k-- */
- addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */
-L(very_large_skip2):
-
- andl %eax, %edx /* k&1 */
- subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */
- addsd MO2(DP_ZERONE,%edx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */
- addsd %xmm2, %xmm3 /* t += tmp2 */
- shrl $31, %ecx /* sign of x */
- addsd %xmm3, %xmm0 /* t += tmp0 */
- addl $1, %eax /* n=k+1 */
- addsd %xmm1, %xmm0 /* t += tmp1 */
- mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */
-
- jmp L(reconstruction) /* end of very_large_args peth */
-
- .p2align 4
-L(arg_less_pio4):
- /* Here if |x|<Pi/4 */
- cmpl $0x3d000000, %eax /* |x|<2^-5? */
- jl L(arg_less_2pn5)
-
- /* Here if 2^-5<=|x|<Pi/4 */
- movaps %xmm0, %xmm3 /* x */
- mulsd %xmm0, %xmm0 /* y=x^2 */
- movaps %xmm0, %xmm1 /* y */
- mulsd %xmm0, %xmm0 /* z=x^4 */
- movsd MO1(DP_S4), %xmm4 /* S4 */
- mulsd %xmm0, %xmm4 /* z*S4 */
- movsd MO1(DP_S3), %xmm5 /* S3 */
- mulsd %xmm0, %xmm5 /* z*S3 */
- addsd MO1(DP_S2), %xmm4 /* S2+z*S4 */
- mulsd %xmm0, %xmm4 /* z*(S2+z*S4) */
- addsd MO1(DP_S1), %xmm5 /* S1+z*S3 */
- mulsd %xmm0, %xmm5 /* z*(S1+z*S3) */
- addsd MO1(DP_S0), %xmm4 /* S0+z*(S2+z*S4) */
- mulsd %xmm1, %xmm4 /* y*(S0+z*(S2+z*S4)) */
- mulsd %xmm3, %xmm5 /* x*z*(S1+z*S3) */
- mulsd %xmm3, %xmm4 /* x*y*(S0+z*(S2+z*S4)) */
- /* x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
- addsd %xmm5, %xmm4
- /* x + x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
- addsd %xmm4, %xmm3
- cvtsd2ss %xmm3, %xmm3 /* SP result */
-
-L(epilogue):
- lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */
- movss %xmm3, 0(%esp) /* Move result from sse... */
- flds 0(%esp) /* ...to FPU. */
- /* Return back 4 bytes of stack frame */
- lea 4(%esp), %esp
- RETURN
-
- .p2align 4
-L(arg_less_2pn5):
- /* Here if |x|<2^-5 */
- cmpl $0x32000000, %eax /* |x|<2^-27? */
- jl L(arg_less_2pn27)
-
- /* Here if 2^-27<=|x|<2^-5 */
- movaps %xmm0, %xmm1 /* DP x */
- mulsd %xmm0, %xmm0 /* DP x^2 */
- movsd MO1(DP_SIN2_1), %xmm3 /* DP DP_SIN2_1 */
- mulsd %xmm0, %xmm3 /* DP x^2*DP_SIN2_1 */
- addsd MO1(DP_SIN2_0), %xmm3 /* DP DP_SIN2_0+x^2*DP_SIN2_1 */
- mulsd %xmm0, %xmm3 /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
- mulsd %xmm1, %xmm3 /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
- addsd %xmm1, %xmm3 /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
- cvtsd2ss %xmm3, %xmm3 /* SP result */
- jmp L(epilogue)
-
- .p2align 4
-L(arg_less_2pn27):
- movss ARG_X, %xmm3 /* SP x */
- cmpl $0, %eax /* x=0? */
- je L(epilogue) /* in case x=0 return sin(+-0)==+-0 */
- /* Here if |x|<2^-27 */
- /*
- * Special cases here:
- * sin(subnormal) raises inexact/underflow
- * sin(min_normalized) raises inexact/underflow
- * sin(normalized) raises inexact
- */
- movaps %xmm0, %xmm3 /* Copy of DP x */
- mulsd MO1(DP_SMALL), %xmm0 /* x*DP_SMALL */
- subsd %xmm0, %xmm3 /* Result is x-x*DP_SMALL */
- cvtsd2ss %xmm3, %xmm3 /* Result converted to SP */
- jmp L(epilogue)
-
- .p2align 4
-L(arg_inf_or_nan):
- /* Here if |x| is Inf or NAN */
- jne L(skip_errno_setting) /* in case of x is NaN */
-
- /* Here if x is Inf. Set errno to EDOM. */
- call JUMPTARGET(__errno_location)
- movl $EDOM, (%eax)
-
- .p2align 4
-L(skip_errno_setting):
- /* Here if |x| is Inf or NAN. Continued. */
- movss ARG_X, %xmm3 /* load x */
- subss %xmm3, %xmm3 /* Result is NaN */
- jmp L(epilogue)
-END(__sinf_sse2)
-
- .section .rodata, "a"
- .p2align 3
-L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
- .long 0x00000000,0x00000000
- .long 0x54442d18,0x3fe921fb
- .long 0x54442d18,0x3ff921fb
- .long 0x7f3321d2,0x4002d97c
- .long 0x54442d18,0x400921fb
- .long 0x2955385e,0x400f6a7a
- .long 0x7f3321d2,0x4012d97c
- .long 0xe9bba775,0x4015fdbb
- .long 0x54442d18,0x401921fb
- .long 0xbeccb2bb,0x401c463a
- .long 0x2955385e,0x401f6a7a
- .type L(PIO4J), @object
- ASM_SIZE_DIRECTIVE(L(PIO4J))
-
- .p2align 3
-L(_FPI): /* 4/Pi broken into sum of positive DP values */
- .long 0x00000000,0x00000000
- .long 0x6c000000,0x3ff45f30
- .long 0x2a000000,0x3e3c9c88
- .long 0xa8000000,0x3c54fe13
- .long 0xd0000000,0x3aaf47d4
- .long 0x6c000000,0x38fbb81b
- .long 0xe0000000,0x3714acc9
- .long 0x7c000000,0x3560e410
- .long 0x56000000,0x33bca2c7
- .long 0xac000000,0x31fbd778
- .long 0xe0000000,0x300b7246
- .long 0xe8000000,0x2e5d2126
- .long 0x48000000,0x2c970032
- .long 0xe8000000,0x2ad77504
- .long 0xe0000000,0x290921cf
- .long 0xb0000000,0x274deb1c
- .long 0xe0000000,0x25829a73
- .long 0xbe000000,0x23fd1046
- .long 0x10000000,0x2224baed
- .long 0x8e000000,0x20709d33
- .long 0x80000000,0x1e535a2f
- .long 0x64000000,0x1cef904e
- .long 0x30000000,0x1b0d6398
- .long 0x24000000,0x1964ce7d
- .long 0x16000000,0x17b908bf
- .type L(_FPI), @object
- ASM_SIZE_DIRECTIVE(L(_FPI))
-
-/* Coefficients of polynomial
- for sin(x)~=x+x^3*DP_SIN2_0+x^5*DP_SIN2_1, |x|<2^-5. */
- .p2align 3
-L(DP_SIN2_0):
- .long 0x5543d49d,0xbfc55555
- .type L(DP_SIN2_0), @object
- ASM_SIZE_DIRECTIVE(L(DP_SIN2_0))
-
- .p2align 3
-L(DP_SIN2_1):
- .long 0x75cec8c5,0x3f8110f4
- .type L(DP_SIN2_1), @object
- ASM_SIZE_DIRECTIVE(L(DP_SIN2_1))
-
- .p2align 3
-L(DP_ZERONE):
- .long 0x00000000,0x00000000 /* 0.0 */
- .long 0x00000000,0xbff00000 /* 1.0 */
- .type L(DP_ZERONE), @object
- ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
-
- .p2align 3
-L(DP_ONES):
- .long 0x00000000,0x3ff00000 /* +1.0 */
- .long 0x00000000,0xbff00000 /* -1.0 */
- .type L(DP_ONES), @object
- ASM_SIZE_DIRECTIVE(L(DP_ONES))
-
-/* Coefficients of polynomial
- for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4. */
- .p2align 3
-L(DP_S3):
- .long 0x64e6b5b4,0x3ec71d72
- .type L(DP_S3), @object
- ASM_SIZE_DIRECTIVE(L(DP_S3))
-
- .p2align 3
-L(DP_S1):
- .long 0x10c2688b,0x3f811111
- .type L(DP_S1), @object
- ASM_SIZE_DIRECTIVE(L(DP_S1))
-
- .p2align 3
-L(DP_S4):
- .long 0x1674b58a,0xbe5a947e
- .type L(DP_S4), @object
- ASM_SIZE_DIRECTIVE(L(DP_S4))
-
- .p2align 3
-L(DP_S2):
- .long 0x8b4bd1f9,0xbf2a019f
- .type L(DP_S2), @object
- ASM_SIZE_DIRECTIVE(L(DP_S2))
-
- .p2align 3
-L(DP_S0):
- .long 0x55551cd9,0xbfc55555
- .type L(DP_S0), @object
- ASM_SIZE_DIRECTIVE(L(DP_S0))
-
- .p2align 3
-L(DP_SMALL):
- .long 0x00000000,0x3cd00000 /* 2^(-50) */
- .type L(DP_SMALL), @object
- ASM_SIZE_DIRECTIVE(L(DP_SMALL))
-
-/* Coefficients of polynomial
- for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4. */
- .p2align 3
-L(DP_C3):
- .long 0x9ac43cc0,0x3efa00eb
- .type L(DP_C3), @object
- ASM_SIZE_DIRECTIVE(L(DP_C3))
-
- .p2align 3
-L(DP_C1):
- .long 0x545c50c7,0x3fa55555
- .type L(DP_C1), @object
- ASM_SIZE_DIRECTIVE(L(DP_C1))
-
- .p2align 3
-L(DP_C4):
- .long 0xdd8844d7,0xbe923c97
- .type L(DP_C4), @object
- ASM_SIZE_DIRECTIVE(L(DP_C4))
-
- .p2align 3
-L(DP_C2):
- .long 0x348b6874,0xbf56c16b
- .type L(DP_C2), @object
- ASM_SIZE_DIRECTIVE(L(DP_C2))
-
- .p2align 3
-L(DP_C0):
- .long 0xfffe98ae,0xbfdfffff
- .type L(DP_C0), @object
- ASM_SIZE_DIRECTIVE(L(DP_C0))
-
- .p2align 3
-L(DP_PIO4):
- .long 0x54442d18,0x3fe921fb /* Pi/4 */
- .type L(DP_PIO4), @object
- ASM_SIZE_DIRECTIVE(L(DP_PIO4))
-
- .p2align 3
-L(DP_2POW52):
- .long 0x00000000,0x43300000 /* +2^52 */
- .long 0x00000000,0xc3300000 /* -2^52 */
- .type L(DP_2POW52), @object
- ASM_SIZE_DIRECTIVE(L(DP_2POW52))
-
- .p2align 3
-L(DP_INVPIO4):
- .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */
- .type L(DP_INVPIO4), @object
- ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
-
- .p2align 3
-L(DP_PIO4HI):
- .long 0x54000000,0xbfe921fb /* High part of Pi/4 */
- .type L(DP_PIO4HI), @object
- ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
-
- .p2align 3
-L(DP_PIO4LO):
- .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */
- .type L(DP_PIO4LO), @object
- ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
-
- .p2align 2
-L(SP_INVPIO4):
- .long 0x3fa2f983 /* 4/Pi */
- .type L(SP_INVPIO4), @object
- ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
-
- .p2align 4
-L(DP_ABS_MASK): /* Mask for getting DP absolute value */
- .long 0xffffffff,0x7fffffff
- .long 0xffffffff,0x7fffffff
- .type L(DP_ABS_MASK), @object
- ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
-
- .p2align 3
-L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
- .long 0x00000000,0xffffffff
- .type L(DP_HI_MASK), @object
- ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
-
-weak_alias (__sinf, sinf)
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sinf.c b/sysdeps/i386/i686/fpu/multiarch/s_sinf.c
deleted file mode 100644
index 8ccdd2f34d..0000000000
--- a/sysdeps/i386/i686/fpu/multiarch/s_sinf.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/* Multiple versions of sinf
- Copyright (C) 2012-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <init-arch.h>
-
-extern float __sinf_sse2 (float);
-extern float __sinf_ia32 (float);
-float __sinf (float);
-
-libm_ifunc (__sinf, HAS_CPU_FEATURE (SSE2) ? __sinf_sse2 : __sinf_ia32);
-weak_alias (__sinf, sinf);
-#define SINF __sinf_ia32
-#include <sysdeps/ieee754/flt-32/s_sinf.c>
diff --git a/sysdeps/i386/i686/fpu/s_fmax.S b/sysdeps/i386/i686/fpu/s_fmax.S
deleted file mode 100644
index ace8db9410..0000000000
--- a/sysdeps/i386/i686/fpu/s_fmax.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/* Compute maximum of two numbers, regarding NaN as missing argument.
- Copyright (C) 1997-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
- .text
-ENTRY(__fmax)
- fldl 4(%esp) // x
- fldl 12(%esp) // x : y
-
- fucomi %st(0), %st
- fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise
-
- fxch
-
- fucomi %st(1), %st
- fcmovb %st(1), %st
-
- fstp %st(1)
-
- ret
-END(__fmax)
-weak_alias (__fmax, fmax)
diff --git a/sysdeps/i386/i686/fpu/s_fmaxf.S b/sysdeps/i386/i686/fpu/s_fmaxf.S
deleted file mode 100644
index 3a25951a09..0000000000
--- a/sysdeps/i386/i686/fpu/s_fmaxf.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/* Compute maximum of two numbers, regarding NaN as missing argument.
- Copyright (C) 1997-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
- .text
-ENTRY(__fmaxf)
- flds 4(%esp) // x
- flds 8(%esp) // x : y
-
- fucomi %st(0), %st
- fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise
-
- fxch
-
- fucomi %st(1), %st
- fcmovb %st(1), %st
-
- fstp %st(1)
-
- ret
-END(__fmaxf)
-weak_alias (__fmaxf, fmaxf)
diff --git a/sysdeps/i386/i686/fpu/s_fmaxl.S b/sysdeps/i386/i686/fpu/s_fmaxl.S
deleted file mode 100644
index 3f6c21c63d..0000000000
--- a/sysdeps/i386/i686/fpu/s_fmaxl.S
+++ /dev/null
@@ -1,58 +0,0 @@
-/* Compute maximum of two numbers, regarding NaN as missing argument.
- Copyright (C) 1997-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
- .text
-ENTRY(__fmaxl)
- fldt 4(%esp) // x
- fldt 16(%esp) // x : y
-
- fucomi %st(1), %st
- jp 2f
- fcmovb %st(1), %st
-
- fstp %st(1)
-
- ret
-
-2: // Unordered.
- fucomi %st(0), %st
- jp 3f
- // st(1) is a NaN; st(0) is not. Test if st(1) is signaling.
- testb $0x40, 11(%esp)
- jz 4f
- fstp %st(1)
- ret
-
-3: // st(0) is a NaN; st(1) may or may not be.
- fxch
- fucomi %st(0), %st
- jp 4f
- // st(1) is a NaN; st(0) is not. Test if st(1) is signaling.
- testb $0x40, 23(%esp)
- jz 4f
- fstp %st(1)
- ret
-
-4: // Both arguments are NaNs, or one is a signaling NaN.
- faddp
- ret
-END(__fmaxl)
-weak_alias (__fmaxl, fmaxl)
diff --git a/sysdeps/i386/i686/fpu/s_fmin.S b/sysdeps/i386/i686/fpu/s_fmin.S
deleted file mode 100644
index 72d306fd79..0000000000
--- a/sysdeps/i386/i686/fpu/s_fmin.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Compute minimum of two numbers, regarding NaN as missing argument.
- Copyright (C) 1997-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
- .text
-ENTRY(__fmin)
- fldl 4(%esp) // x
- fldl 12(%esp) // x : y
-
- fucomi %st(0), %st
- fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise
-
- fucomi %st(1), %st
- fcmovnb %st(1), %st
-
- fstp %st(1)
-
- ret
-END(__fmin)
-weak_alias (__fmin, fmin)
diff --git a/sysdeps/i386/i686/fpu/s_fminf.S b/sysdeps/i386/i686/fpu/s_fminf.S
deleted file mode 100644
index 52ea892bad..0000000000
--- a/sysdeps/i386/i686/fpu/s_fminf.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Compute minimum of two numbers, regarding NaN as missing argument.
- Copyright (C) 1997-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
- .text
-ENTRY(__fminf)
- flds 4(%esp) // x
- flds 8(%esp) // x : y
-
- fucomi %st(0), %st
- fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise
-
- fucomi %st(1), %st
- fcmovnb %st(1), %st
-
- fstp %st(1)
-
- ret
-END(__fminf)
-weak_alias (__fminf, fminf)
diff --git a/sysdeps/i386/i686/fpu/s_fminl.S b/sysdeps/i386/i686/fpu/s_fminl.S
deleted file mode 100644
index e1cb83fed7..0000000000
--- a/sysdeps/i386/i686/fpu/s_fminl.S
+++ /dev/null
@@ -1,58 +0,0 @@
-/* Compute minimum of two numbers, regarding NaN as missing argument.
- Copyright (C) 1997-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
- .text
-ENTRY(__fminl)
- fldt 4(%esp) // x
- fldt 16(%esp) // x : y
-
- fucomi %st(1), %st
- jp 2f
- fcmovnb %st(1), %st
-
- fstp %st(1)
-
- ret
-
-2: // Unordered.
- fucomi %st(0), %st
- jp 3f
- // st(1) is a NaN; st(0) is not. Test if st(1) is signaling.
- testb $0x40, 11(%esp)
- jz 4f
- fstp %st(1)
- ret
-
-3: // st(0) is a NaN; st(1) may or may not be.
- fxch
- fucomi %st(0), %st
- jp 4f
- // st(1) is a NaN; st(0) is not. Test if st(1) is signaling.
- testb $0x40, 23(%esp)
- jz 4f
- fstp %st(1)
- ret
-
-4: // Both arguments are NaNs, or one is a signaling NaN.
- faddp
- ret
-END(__fminl)
-weak_alias (__fminl, fminl)
diff --git a/sysdeps/i386/i686/hp-timing.h b/sysdeps/i386/i686/hp-timing.h
deleted file mode 100644
index 1b11410feb..0000000000
--- a/sysdeps/i386/i686/hp-timing.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/* High precision, low overhead timing functions. i686 version.
- Copyright (C) 1998-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifndef _HP_TIMING_H
-#define _HP_TIMING_H 1
-
-/* We always assume having the timestamp register. */
-#define HP_TIMING_AVAIL (1)
-#define HP_SMALL_TIMING_AVAIL (1)
-
-/* We indeed have inlined functions. */
-#define HP_TIMING_INLINE (1)
-
-/* We use 64bit values for the times. */
-typedef unsigned long long int hp_timing_t;
-
-/* That's quite simple. Use the `rdtsc' instruction. Note that the value
- might not be 100% accurate since there might be some more instructions
- running in this moment. This could be changed by using a barrier like
- 'cpuid' right before the `rdtsc' instruciton. But we are not interested
- in accurate clock cycles here so we don't do this. */
-#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("rdtsc" : "=A" (Var))
-
-#include <hp-timing-common.h>
-
-#endif /* hp-timing.h */
diff --git a/sysdeps/i386/i686/init-arch.h b/sysdeps/i386/i686/init-arch.h
deleted file mode 100644
index f55f80efa0..0000000000
--- a/sysdeps/i386/i686/init-arch.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* Copyright (C) 2015-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#define MINIMUM_ISA 686
-#include <sysdeps/x86/init-arch.h>
diff --git a/sysdeps/i386/i686/memcmp.S b/sysdeps/i386/i686/memcmp.S
deleted file mode 100644
index 5140ee2145..0000000000
--- a/sysdeps/i386/i686/memcmp.S
+++ /dev/null
@@ -1,408 +0,0 @@
-/* Compare two memory blocks for differences in the first COUNT bytes.
- Copyright (C) 2004-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define PARMS 4+4 /* Preserve EBX. */
-#define BLK1 PARMS
-#define BLK2 BLK1+4
-#define LEN BLK2+4
-#define ENTRANCE pushl %ebx; cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (ebx, 0)
-#define RETURN popl %ebx; cfi_adjust_cfa_offset (-4); \
- cfi_restore (ebx); ret
-
-/* Load an entry in a jump table into EBX. TABLE is a jump table
- with relative offsets. INDEX is a register contains the index
- into the jump table. */
-#define LOAD_JUMP_TABLE_ENTRY(TABLE, INDEX) \
- /* We first load PC into EBX. */ \
- SETUP_PIC_REG(bx); \
- /* Get the address of the jump table. */ \
- addl $(TABLE - .), %ebx; \
- /* Get the entry and convert the relative offset to the \
- absolute address. */ \
- addl (%ebx,INDEX,4), %ebx
-
- .text
- ALIGN (4)
-ENTRY (memcmp)
- ENTRANCE
-
- movl BLK1(%esp), %eax
- movl BLK2(%esp), %edx
- movl LEN(%esp), %ecx
-
- cmpl $1, %ecx
- jne L(not_1)
- movzbl (%eax), %ecx /* LEN == 1 */
- cmpb (%edx), %cl
- jne L(neq)
-L(bye):
- xorl %eax, %eax
- RETURN
-
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
-L(neq):
- sbbl %eax, %eax
- sbbl $-1, %eax
- RETURN
-
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (ebx, 0)
-L(not_1):
- jl L(bye) /* LEN == 0 */
-
- pushl %esi
- cfi_adjust_cfa_offset (4)
- movl %eax, %esi
- cfi_rel_offset (esi, 0)
- cmpl $32, %ecx;
- jge L(32bytesormore) /* LEN => 32 */
-
- LOAD_JUMP_TABLE_ENTRY (L(table_32bytes), %ecx)
- addl %ecx, %edx
- addl %ecx, %esi
- jmp *%ebx
-
- ALIGN (4)
-L(28bytes):
- movl -28(%esi), %eax
- movl -28(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(24bytes):
- movl -24(%esi), %eax
- movl -24(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(20bytes):
- movl -20(%esi), %eax
- movl -20(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(16bytes):
- movl -16(%esi), %eax
- movl -16(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(12bytes):
- movl -12(%esi), %eax
- movl -12(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(8bytes):
- movl -8(%esi), %eax
- movl -8(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(4bytes):
- movl -4(%esi), %eax
- movl -4(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(0bytes):
- popl %esi
- cfi_adjust_cfa_offset (-4)
- cfi_restore (esi)
- xorl %eax, %eax
- RETURN
-
- cfi_adjust_cfa_offset (8)
- cfi_rel_offset (esi, 0)
- cfi_rel_offset (ebx, 4)
-L(29bytes):
- movl -29(%esi), %eax
- movl -29(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(25bytes):
- movl -25(%esi), %eax
- movl -25(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(21bytes):
- movl -21(%esi), %eax
- movl -21(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(17bytes):
- movl -17(%esi), %eax
- movl -17(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(13bytes):
- movl -13(%esi), %eax
- movl -13(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(9bytes):
- movl -9(%esi), %eax
- movl -9(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(5bytes):
- movl -5(%esi), %eax
- movl -5(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(1bytes):
- movzbl -1(%esi), %eax
- cmpb -1(%edx), %al
- jne L(set)
- popl %esi
- cfi_adjust_cfa_offset (-4)
- cfi_restore (esi)
- xorl %eax, %eax
- RETURN
-
- cfi_adjust_cfa_offset (8)
- cfi_rel_offset (esi, 0)
- cfi_rel_offset (ebx, 4)
-L(30bytes):
- movl -30(%esi), %eax
- movl -30(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(26bytes):
- movl -26(%esi), %eax
- movl -26(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(22bytes):
- movl -22(%esi), %eax
- movl -22(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(18bytes):
- movl -18(%esi), %eax
- movl -18(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(14bytes):
- movl -14(%esi), %eax
- movl -14(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(10bytes):
- movl -10(%esi), %eax
- movl -10(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(6bytes):
- movl -6(%esi), %eax
- movl -6(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(2bytes):
- movzwl -2(%esi), %eax
- movzwl -2(%edx), %ecx
- cmpb %cl, %al
- jne L(set)
- cmpl %ecx, %eax
- jne L(set)
- popl %esi
- cfi_adjust_cfa_offset (-4)
- cfi_restore (esi)
- xorl %eax, %eax
- RETURN
-
- cfi_adjust_cfa_offset (8)
- cfi_rel_offset (esi, 0)
- cfi_rel_offset (ebx, 4)
-L(31bytes):
- movl -31(%esi), %eax
- movl -31(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(27bytes):
- movl -27(%esi), %eax
- movl -27(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(23bytes):
- movl -23(%esi), %eax
- movl -23(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(19bytes):
- movl -19(%esi), %eax
- movl -19(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(15bytes):
- movl -15(%esi), %eax
- movl -15(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(11bytes):
- movl -11(%esi), %eax
- movl -11(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(7bytes):
- movl -7(%esi), %eax
- movl -7(%edx), %ecx
- cmpl %ecx, %eax
- jne L(find_diff)
-L(3bytes):
- movzwl -3(%esi), %eax
- movzwl -3(%edx), %ecx
- cmpb %cl, %al
- jne L(set)
- cmpl %ecx, %eax
- jne L(set)
- movzbl -1(%esi), %eax
- cmpb -1(%edx), %al
- jne L(set)
- popl %esi
- cfi_adjust_cfa_offset (-4)
- cfi_restore (esi)
- xorl %eax, %eax
- RETURN
-
- cfi_adjust_cfa_offset (8)
- cfi_rel_offset (esi, 0)
- cfi_rel_offset (ebx, 4)
- ALIGN (4)
-/* ECX >= 32. */
-L(32bytesormore):
- subl $32, %ecx
-
- movl (%esi), %eax
- cmpl (%edx), %eax
- jne L(load_ecx)
-
- movl 4(%esi), %eax
- cmpl 4(%edx), %eax
- jne L(load_ecx_4)
-
- movl 8(%esi), %eax
- cmpl 8(%edx), %eax
- jne L(load_ecx_8)
-
- movl 12(%esi), %eax
- cmpl 12(%edx), %eax
- jne L(load_ecx_12)
-
- movl 16(%esi), %eax
- cmpl 16(%edx), %eax
- jne L(load_ecx_16)
-
- movl 20(%esi), %eax
- cmpl 20(%edx), %eax
- jne L(load_ecx_20)
-
- movl 24(%esi), %eax
- cmpl 24(%edx), %eax
- jne L(load_ecx_24)
-
- movl 28(%esi), %eax
- cmpl 28(%edx), %eax
- jne L(load_ecx_28)
-
- addl $32, %esi
- addl $32, %edx
- cmpl $32, %ecx
- jge L(32bytesormore)
-
- LOAD_JUMP_TABLE_ENTRY (L(table_32bytes), %ecx)
- addl %ecx, %edx
- addl %ecx, %esi
- jmp *%ebx
-
-L(load_ecx_28):
- addl $0x4, %edx
-L(load_ecx_24):
- addl $0x4, %edx
-L(load_ecx_20):
- addl $0x4, %edx
-L(load_ecx_16):
- addl $0x4, %edx
-L(load_ecx_12):
- addl $0x4, %edx
-L(load_ecx_8):
- addl $0x4, %edx
-L(load_ecx_4):
- addl $0x4, %edx
-L(load_ecx):
- movl (%edx), %ecx
-
-L(find_diff):
- cmpb %cl, %al
- jne L(set)
- cmpb %ch, %ah
- jne L(set)
- shrl $16,%eax
- shrl $16,%ecx
- cmpb %cl, %al
- jne L(set)
- /* We get there only if we already know there is a
- difference. */
- cmpl %ecx, %eax
-L(set):
- sbbl %eax, %eax
- sbbl $-1, %eax
- popl %esi
- cfi_adjust_cfa_offset (-4)
- cfi_restore (esi)
- RETURN
-END (memcmp)
-
- .section .rodata
- ALIGN (2)
-L(table_32bytes) :
- .long L(0bytes) - L(table_32bytes)
- .long L(1bytes) - L(table_32bytes)
- .long L(2bytes) - L(table_32bytes)
- .long L(3bytes) - L(table_32bytes)
- .long L(4bytes) - L(table_32bytes)
- .long L(5bytes) - L(table_32bytes)
- .long L(6bytes) - L(table_32bytes)
- .long L(7bytes) - L(table_32bytes)
- .long L(8bytes) - L(table_32bytes)
- .long L(9bytes) - L(table_32bytes)
- .long L(10bytes) - L(table_32bytes)
- .long L(11bytes) - L(table_32bytes)
- .long L(12bytes) - L(table_32bytes)
- .long L(13bytes) - L(table_32bytes)
- .long L(14bytes) - L(table_32bytes)
- .long L(15bytes) - L(table_32bytes)
- .long L(16bytes) - L(table_32bytes)
- .long L(17bytes) - L(table_32bytes)
- .long L(18bytes) - L(table_32bytes)
- .long L(19bytes) - L(table_32bytes)
- .long L(20bytes) - L(table_32bytes)
- .long L(21bytes) - L(table_32bytes)
- .long L(22bytes) - L(table_32bytes)
- .long L(23bytes) - L(table_32bytes)
- .long L(24bytes) - L(table_32bytes)
- .long L(25bytes) - L(table_32bytes)
- .long L(26bytes) - L(table_32bytes)
- .long L(27bytes) - L(table_32bytes)
- .long L(28bytes) - L(table_32bytes)
- .long L(29bytes) - L(table_32bytes)
- .long L(30bytes) - L(table_32bytes)
- .long L(31bytes) - L(table_32bytes)
-
-
-#undef bcmp
-weak_alias (memcmp, bcmp)
-libc_hidden_builtin_def (memcmp)
diff --git a/sysdeps/i386/i686/memcpy.S b/sysdeps/i386/i686/memcpy.S
deleted file mode 100644
index 1d61447430..0000000000
--- a/sysdeps/i386/i686/memcpy.S
+++ /dev/null
@@ -1,98 +0,0 @@
-/* Copy memory block and return pointer to beginning of destination block
- For Intel 80x86, x>=6.
- This file is part of the GNU C Library.
- Copyright (C) 1999-2017 Free Software Foundation, Inc.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define PARMS 4 /* no space for saved regs */
-#define RTN PARMS
-#define DEST RTN
-#define SRC DEST+4
-#define LEN SRC+4
-
- .text
-#if defined PIC && IS_IN (libc)
-ENTRY_CHK (__memcpy_chk)
- movl 12(%esp), %eax
- cmpl %eax, 16(%esp)
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END_CHK (__memcpy_chk)
-#endif
-ENTRY (memcpy)
-
- movl %edi, %eax
- movl DEST(%esp), %edi
- movl %esi, %edx
- movl SRC(%esp), %esi
-
- movl %edi, %ecx
- xorl %esi, %ecx
- andl $3, %ecx
- movl LEN(%esp), %ecx
- cld
- jne .Lunaligned
-
- cmpl $3, %ecx
- jbe .Lunaligned
-
- testl $3, %esi
- je 1f
- movsb
- decl %ecx
- testl $3, %esi
- je 1f
- movsb
- decl %ecx
- testl $3, %esi
- je 1f
- movsb
- decl %ecx
-1: pushl %eax
- movl %ecx, %eax
- shrl $2, %ecx
- andl $3, %eax
- rep
- movsl
- movl %eax, %ecx
- rep
- movsb
- popl %eax
-
-.Lend: movl %eax, %edi
- movl %edx, %esi
- movl DEST(%esp), %eax
-
- ret
-
- /* When we come here the pointers do not have the same
- alignment or the length is too short. No need to optimize for
- aligned memory accesses. */
-.Lunaligned:
- shrl $1, %ecx
- jnc 1f
- movsb
-1: shrl $1, %ecx
- jnc 2f
- movsw
-2: rep
- movsl
- jmp .Lend
-END (memcpy)
-libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/i386/i686/memmove.S b/sysdeps/i386/i686/memmove.S
deleted file mode 100644
index f60c3d501b..0000000000
--- a/sysdeps/i386/i686/memmove.S
+++ /dev/null
@@ -1,120 +0,0 @@
-/* Copy memory block and return pointer to beginning of destination block
- For Intel 80x86, x>=6.
- This file is part of the GNU C Library.
- Copyright (C) 2003-2017 Free Software Foundation, Inc.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 2003.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define PARMS 4+4 /* one spilled register */
-#define RTN PARMS
-
- .text
-
-#ifdef USE_AS_BCOPY
-# define SRC RTN
-# define DEST SRC+4
-# define LEN DEST+4
-#else
-# define DEST RTN
-# define SRC DEST+4
-# define LEN SRC+4
-
-# if defined PIC && IS_IN (libc)
-ENTRY_CHK (__memmove_chk)
- movl 12(%esp), %eax
- cmpl %eax, 16(%esp)
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END_CHK (__memmove_chk)
-# endif
-#endif
-
-ENTRY (memmove)
-
- pushl %edi
- cfi_adjust_cfa_offset (4)
-
- movl LEN(%esp), %ecx
- movl DEST(%esp), %edi
- cfi_rel_offset (edi, 0)
- movl %esi, %edx
- movl SRC(%esp), %esi
- cfi_register (esi, edx)
-
- movl %edi, %eax
- subl %esi, %eax
- cmpl %eax, %ecx
- ja 3f
-
- cld
- shrl $1, %ecx
- jnc 1f
- movsb
-1: shrl $1, %ecx
- jnc 2f
- movsw
-2: rep
- movsl
- movl %edx, %esi
- cfi_restore (esi)
-#ifndef USE_AS_BCOPY
- movl DEST(%esp), %eax
-#endif
-
- popl %edi
- cfi_adjust_cfa_offset (-4)
- cfi_restore (edi)
-
- ret
-
- cfi_adjust_cfa_offset (4)
- cfi_rel_offset (edi, 0)
- cfi_register (esi, edx)
-
- /* Backward copying. */
-3: std
- leal -1(%edi, %ecx), %edi
- leal -1(%esi, %ecx), %esi
- shrl $1, %ecx
- jnc 1f
- movsb
-1: subl $1, %edi
- subl $1, %esi
- shrl $1, %ecx
- jnc 2f
- movsw
-2: subl $2, %edi
- subl $2, %esi
- rep
- movsl
- movl %edx, %esi
- cfi_restore (esi)
-#ifndef USE_AS_BCOPY
- movl DEST(%esp), %eax
-#endif
-
- cld
- popl %edi
- cfi_adjust_cfa_offset (-4)
- cfi_restore (edi)
-
- ret
-END (memmove)
-#ifndef USE_AS_BCOPY
-libc_hidden_builtin_def (memmove)
-#endif
diff --git a/sysdeps/i386/i686/mempcpy.S b/sysdeps/i386/i686/mempcpy.S
deleted file mode 100644
index 31cb4efdb2..0000000000
--- a/sysdeps/i386/i686/mempcpy.S
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Copy memory block and return pointer to following byte.
- For Intel 80x86, x>=6.
- This file is part of the GNU C Library.
- Copyright (C) 1998-2017 Free Software Foundation, Inc.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define PARMS 4 /* no space for saved regs */
-#define RTN PARMS
-#define DEST RTN
-#define SRC DEST+4
-#define LEN SRC+4
-
- .text
-#if defined PIC && IS_IN (libc)
-ENTRY_CHK (__mempcpy_chk)
- movl 12(%esp), %eax
- cmpl %eax, 16(%esp)
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END_CHK (__mempcpy_chk)
-#endif
-ENTRY (__mempcpy)
-
- movl LEN(%esp), %ecx
- movl %edi, %eax
- cfi_register (edi, eax)
- movl DEST(%esp), %edi
- movl %esi, %edx
- cfi_register (esi, edx)
- movl SRC(%esp), %esi
- cld
- shrl $1, %ecx
- jnc 1f
- movsb
-1: shrl $1, %ecx
- jnc 2f
- movsw
-2: rep
- movsl
- xchgl %edi, %eax
- cfi_restore (edi)
- movl %edx, %esi
- cfi_restore (esi)
-
- ret
-END (__mempcpy)
-libc_hidden_def (__mempcpy)
-weak_alias (__mempcpy, mempcpy)
-libc_hidden_builtin_def (mempcpy)
diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S
deleted file mode 100644
index 24d06178d2..0000000000
--- a/sysdeps/i386/i686/memset.S
+++ /dev/null
@@ -1,100 +0,0 @@
-/* memset/bzero -- set memory area to CH/0
- Highly optimized version for ix86, x>=6.
- Copyright (C) 1999-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define PARMS 4+4 /* space for 1 saved reg */
-#ifdef USE_AS_BZERO
-# define DEST PARMS
-# define LEN DEST+4
-#else
-# define RTN PARMS
-# define DEST RTN
-# define CHR DEST+4
-# define LEN CHR+4
-#endif
-
- .text
-#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO
-ENTRY_CHK (__memset_chk)
- movl 12(%esp), %eax
- cmpl %eax, 16(%esp)
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END_CHK (__memset_chk)
-#endif
-ENTRY (memset)
-
- cld
- pushl %edi
- cfi_adjust_cfa_offset (4)
- movl DEST(%esp), %edx
- movl LEN(%esp), %ecx
-#ifdef USE_AS_BZERO
- xorl %eax, %eax /* fill with 0 */
-#else
- movzbl CHR(%esp), %eax
-#endif
- jecxz 1f
- movl %edx, %edi
- cfi_rel_offset (edi, 0)
- andl $3, %edx
- jz 2f /* aligned */
- jp 3f /* misaligned at 3, store just one byte below */
- stosb /* misaligned at 1 or 2, store two bytes */
- decl %ecx
- jz 1f
-3: stosb
- decl %ecx
- jz 1f
- xorl $1, %edx
- jnz 2f /* was misaligned at 2 or 3, now aligned */
- stosb /* was misaligned at 1, store third byte */
- decl %ecx
-2: movl %ecx, %edx
- shrl $2, %ecx
- andl $3, %edx
-#ifndef USE_AS_BZERO
- imul $0x01010101, %eax
-#endif
- rep
- stosl
- movl %edx, %ecx
- rep
- stosb
-
-1:
-#ifndef USE_AS_BZERO
- movl DEST(%esp), %eax /* start address of destination is result */
-#endif
- popl %edi
- cfi_adjust_cfa_offset (-4)
- cfi_restore (edi)
-
- ret
-END (memset)
-libc_hidden_builtin_def (memset)
-
-#if defined SHARED && IS_IN (libc) && !defined __memset_chk \
- && !defined USE_AS_BZERO
-strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
- .section .gnu.warning.__memset_zero_constant_len_parameter
- .string "memset used with constant zero length parameter; this could be due to transposed parameters"
-#endif
diff --git a/sysdeps/i386/i686/memusage.h b/sysdeps/i386/i686/memusage.h
deleted file mode 100644
index 77a020d7c0..0000000000
--- a/sysdeps/i386/i686/memusage.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright (C) 2000-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#define GETSP() ({ register uintptr_t stack_ptr asm ("esp"); stack_ptr; })
-#define GETTIME(low,high) asm ("rdtsc" : "=a" (low), "=d" (high))
-
-#include <sysdeps/generic/memusage.h>
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
deleted file mode 100644
index 4a0c20c051..0000000000
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ /dev/null
@@ -1,44 +0,0 @@
-ifeq ($(subdir),csu)
-tests += test-multiarch
-endif
-
-ifeq ($(subdir),string)
-gen-as-const-headers += locale-defines.sym
-sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
- memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \
- memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \
- memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \
- strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \
- memcmp-ssse3 memcmp-sse4 varshift \
- strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \
- strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \
- strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \
- strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c \
- strchr-sse2 strrchr-sse2 strchr-sse2-bsf strrchr-sse2-bsf \
- memchr-sse2 memchr-sse2-bsf \
- memrchr-sse2 memrchr-sse2-bsf memrchr-c \
- rawmemchr-sse2 rawmemchr-sse2-bsf \
- strnlen-sse2 strnlen-c \
- strcasecmp_l-c strcasecmp-c strcasecmp_l-ssse3 \
- strncase_l-c strncase-c strncase_l-ssse3 \
- strcasecmp_l-sse4 strncase_l-sse4 \
- bcopy-sse2-unaligned memcpy-sse2-unaligned \
- mempcpy-sse2-unaligned memmove-sse2-unaligned \
- strcspn-c strpbrk-c strspn-c
-CFLAGS-varshift.c += -msse4
-CFLAGS-strcspn-c.c += -msse4
-CFLAGS-strpbrk-c.c += -msse4
-CFLAGS-strspn-c.c += -msse4
-endif
-
-ifeq ($(subdir),wcsmbs)
-sysdep_routines += wcscmp-sse2 wcscmp-c wcslen-sse2 wcslen-c \
- wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wcschr-sse2 \
- wcschr-c wcsrchr-sse2 wcsrchr-c wcscpy-ssse3 wcscpy-c
-endif
-
-ifeq ($(subdir),math)
-libm-sysdep_routines += s_fma-fma s_fmaf-fma
-CFLAGS-s_fma-fma.c += -mavx -mfpmath=sse
-CFLAGS-s_fmaf-fma.c += -mavx -mfpmath=sse
-endif
diff --git a/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S
deleted file mode 100644
index efef2a10dd..0000000000
--- a/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMMOVE
-#define USE_AS_BCOPY
-#define MEMCPY __bcopy_sse2_unaligned
-#include "memcpy-sse2-unaligned.S"
diff --git a/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S b/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S
deleted file mode 100644
index cbc8b420e8..0000000000
--- a/sysdeps/i386/i686/multiarch/bcopy-ssse3-rep.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMMOVE
-#define USE_AS_BCOPY
-#define MEMCPY __bcopy_ssse3_rep
-#include "memcpy-ssse3-rep.S"
diff --git a/sysdeps/i386/i686/multiarch/bcopy-ssse3.S b/sysdeps/i386/i686/multiarch/bcopy-ssse3.S
deleted file mode 100644
index 36aac44b9c..0000000000
--- a/sysdeps/i386/i686/multiarch/bcopy-ssse3.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMMOVE
-#define USE_AS_BCOPY
-#define MEMCPY __bcopy_ssse3
-#include "memcpy-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/bcopy.S b/sysdeps/i386/i686/multiarch/bcopy.S
deleted file mode 100644
index 877f82c28f..0000000000
--- a/sysdeps/i386/i686/multiarch/bcopy.S
+++ /dev/null
@@ -1,59 +0,0 @@
-/* Multiple versions of bcopy
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib. */
-#if IS_IN (libc)
- .text
-ENTRY(bcopy)
- .type bcopy, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__bcopy_ia32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__bcopy_sse2_unaligned)
- HAS_ARCH_FEATURE (Fast_Unaligned_Load)
- jnz 2f
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- LOAD_FUNC_GOT_EAX (__bcopy_ssse3)
- HAS_ARCH_FEATURE (Fast_Rep_String)
- jz 2f
- LOAD_FUNC_GOT_EAX (__bcopy_ssse3_rep)
-2: ret
-END(bcopy)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __bcopy_ia32, @function; \
- .p2align 4; \
- .globl __bcopy_ia32; \
- .hidden __bcopy_ia32; \
- __bcopy_ia32: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __bcopy_ia32, .-__bcopy_ia32
-
-#endif
-
-#include "../bcopy.S"
diff --git a/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S b/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S
deleted file mode 100644
index 507b288bb3..0000000000
--- a/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_BZERO
-#define __memset_sse2_rep __bzero_sse2_rep
-#include "memset-sse2-rep.S"
diff --git a/sysdeps/i386/i686/multiarch/bzero-sse2.S b/sysdeps/i386/i686/multiarch/bzero-sse2.S
deleted file mode 100644
index 8d04512e4e..0000000000
--- a/sysdeps/i386/i686/multiarch/bzero-sse2.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_BZERO
-#define __memset_sse2 __bzero_sse2
-#include "memset-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/bzero.S b/sysdeps/i386/i686/multiarch/bzero.S
deleted file mode 100644
index 9dac490aa2..0000000000
--- a/sysdeps/i386/i686/multiarch/bzero.S
+++ /dev/null
@@ -1,62 +0,0 @@
-/* Multiple versions of bzero
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib. */
-#if IS_IN (libc)
- .text
-ENTRY(__bzero)
- .type __bzero, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__bzero_ia32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX ( __bzero_sse2)
- HAS_ARCH_FEATURE (Fast_Rep_String)
- jz 2f
- LOAD_FUNC_GOT_EAX (__bzero_sse2_rep)
-2: ret
-END(__bzero)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __bzero_ia32, @function; \
- .p2align 4; \
- .globl __bzero_ia32; \
- .hidden __bzero_ia32; \
- __bzero_ia32: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __bzero_ia32, .-__bzero_ia32
-
-# ifdef SHARED
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
- they will be called without setting up EBX needed for PLT which is
- used by IFUNC. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI___bzero; __GI___bzero = __bzero_ia32
-# endif
-#endif
-
-#include "../bzero.S"
diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
deleted file mode 100644
index e8026a2a78..0000000000
--- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
+++ /dev/null
@@ -1,376 +0,0 @@
-/* Enumerate available IFUNC implementations of a function. i686 version.
- Copyright (C) 2012-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <assert.h>
-#include <string.h>
-#include <wchar.h>
-#include <ifunc-impl-list.h>
-#include "init-arch.h"
-
-/* Maximum number of IFUNC implementations. */
-#define MAX_IFUNC 4
-
-/* Fill ARRAY of MAX elements with IFUNC implementations for function
- NAME and return the number of valid entries. */
-
-size_t
-__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- size_t max)
-{
- assert (max >= MAX_IFUNC);
-
- size_t i = 0;
-
- /* Support sysdeps/i386/i686/multiarch/bcopy.S. */
- IFUNC_IMPL (i, name, bcopy,
- IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSSE3),
- __bcopy_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSSE3),
- __bcopy_ssse3)
- IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSE2),
- __bcopy_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/bzero.S. */
- IFUNC_IMPL (i, name, bzero,
- IFUNC_IMPL_ADD (array, i, bzero, HAS_CPU_FEATURE (SSE2),
- __bzero_sse2_rep)
- IFUNC_IMPL_ADD (array, i, bzero, HAS_CPU_FEATURE (SSE2),
- __bzero_sse2)
- IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/memchr.S. */
- IFUNC_IMPL (i, name, memchr,
- IFUNC_IMPL_ADD (array, i, memchr, HAS_CPU_FEATURE (SSE2),
- __memchr_sse2_bsf)
- IFUNC_IMPL_ADD (array, i, memchr, HAS_CPU_FEATURE (SSE2),
- __memchr_sse2)
- IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/memcmp.S. */
- IFUNC_IMPL (i, name, memcmp,
- IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSE4_2),
- __memcmp_sse4_2)
- IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSSE3),
- __memcmp_ssse3)
- IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/memmove_chk.S. */
- IFUNC_IMPL (i, name, __memmove_chk,
- IFUNC_IMPL_ADD (array, i, __memmove_chk,
- HAS_CPU_FEATURE (SSSE3),
- __memmove_chk_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, __memmove_chk,
- HAS_CPU_FEATURE (SSSE3),
- __memmove_chk_ssse3)
- IFUNC_IMPL_ADD (array, i, __memmove_chk,
- HAS_CPU_FEATURE (SSE2),
- __memmove_chk_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
- __memmove_chk_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/memmove.S. */
- IFUNC_IMPL (i, name, memmove,
- IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
- __memmove_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
- __memmove_ssse3)
- IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSE2),
- __memmove_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/memrchr.S. */
- IFUNC_IMPL (i, name, memrchr,
- IFUNC_IMPL_ADD (array, i, memrchr, HAS_CPU_FEATURE (SSE2),
- __memrchr_sse2_bsf)
- IFUNC_IMPL_ADD (array, i, memrchr, HAS_CPU_FEATURE (SSE2),
- __memrchr_sse2)
- IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/memset_chk.S. */
- IFUNC_IMPL (i, name, __memset_chk,
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- HAS_CPU_FEATURE (SSE2),
- __memset_chk_sse2_rep)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- HAS_CPU_FEATURE (SSE2),
- __memset_chk_sse2)
- IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
- __memset_chk_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/memset.S. */
- IFUNC_IMPL (i, name, memset,
- IFUNC_IMPL_ADD (array, i, memset, HAS_CPU_FEATURE (SSE2),
- __memset_sse2_rep)
- IFUNC_IMPL_ADD (array, i, memset, HAS_CPU_FEATURE (SSE2),
- __memset_sse2)
- IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/rawmemchr.S. */
- IFUNC_IMPL (i, name, rawmemchr,
- IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_CPU_FEATURE (SSE2),
- __rawmemchr_sse2_bsf)
- IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_CPU_FEATURE (SSE2),
- __rawmemchr_sse2)
- IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/stpncpy.S. */
- IFUNC_IMPL (i, name, stpncpy,
- IFUNC_IMPL_ADD (array, i, stpncpy, HAS_CPU_FEATURE (SSSE3),
- __stpncpy_ssse3)
- IFUNC_IMPL_ADD (array, i, stpncpy, HAS_CPU_FEATURE (SSE2),
- __stpncpy_sse2)
- IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/stpcpy.S. */
- IFUNC_IMPL (i, name, stpcpy,
- IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSSE3),
- __stpcpy_ssse3)
- IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSE2),
- __stpcpy_sse2)
- IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/strcasecmp.S. */
- IFUNC_IMPL (i, name, strcasecmp,
- IFUNC_IMPL_ADD (array, i, strcasecmp,
- HAS_CPU_FEATURE (SSE4_2),
- __strcasecmp_sse4_2)
- IFUNC_IMPL_ADD (array, i, strcasecmp,
- HAS_CPU_FEATURE (SSSE3),
- __strcasecmp_ssse3)
- IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/strcasecmp_l.S. */
- IFUNC_IMPL (i, name, strcasecmp_l,
- IFUNC_IMPL_ADD (array, i, strcasecmp_l,
- HAS_CPU_FEATURE (SSE4_2),
- __strcasecmp_l_sse4_2)
- IFUNC_IMPL_ADD (array, i, strcasecmp_l,
- HAS_CPU_FEATURE (SSSE3),
- __strcasecmp_l_ssse3)
- IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1,
- __strcasecmp_l_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/strcat.S. */
- IFUNC_IMPL (i, name, strcat,
- IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSSE3),
- __strcat_ssse3)
- IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSE2),
- __strcat_sse2)
- IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/strchr.S. */
- IFUNC_IMPL (i, name, strchr,
- IFUNC_IMPL_ADD (array, i, strchr, HAS_CPU_FEATURE (SSE2),
- __strchr_sse2_bsf)
- IFUNC_IMPL_ADD (array, i, strchr, HAS_CPU_FEATURE (SSE2),
- __strchr_sse2)
- IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/strcmp.S. */
- IFUNC_IMPL (i, name, strcmp,
- IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2),
- __strcmp_sse4_2)
- IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSSE3),
- __strcmp_ssse3)
- IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/strcpy.S. */
- IFUNC_IMPL (i, name, strcpy,
- IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSSE3),
- __strcpy_ssse3)
- IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSE2),
- __strcpy_sse2)
- IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/strcspn.S. */
- IFUNC_IMPL (i, name, strcspn,
- IFUNC_IMPL_ADD (array, i, strcspn, HAS_CPU_FEATURE (SSE4_2),
- __strcspn_sse42)
- IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/strncase.S. */
- IFUNC_IMPL (i, name, strncasecmp,
- IFUNC_IMPL_ADD (array, i, strncasecmp,
- HAS_CPU_FEATURE (SSE4_2),
- __strncasecmp_sse4_2)
- IFUNC_IMPL_ADD (array, i, strncasecmp,
- HAS_CPU_FEATURE (SSSE3),
- __strncasecmp_ssse3)
- IFUNC_IMPL_ADD (array, i, strncasecmp, 1,
- __strncasecmp_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/strncase_l.S. */
- IFUNC_IMPL (i, name, strncasecmp_l,
- IFUNC_IMPL_ADD (array, i, strncasecmp_l,
- HAS_CPU_FEATURE (SSE4_2),
- __strncasecmp_l_sse4_2)
- IFUNC_IMPL_ADD (array, i, strncasecmp_l,
- HAS_CPU_FEATURE (SSSE3),
- __strncasecmp_l_ssse3)
- IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1,
- __strncasecmp_l_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/strncat.S. */
- IFUNC_IMPL (i, name, strncat,
- IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSSE3),
- __strncat_ssse3)
- IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSE2),
- __strncat_sse2)
- IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/strncpy.S. */
- IFUNC_IMPL (i, name, strncpy,
- IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSSE3),
- __strncpy_ssse3)
- IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSE2),
- __strncpy_sse2)
- IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/strnlen.S. */
- IFUNC_IMPL (i, name, strnlen,
- IFUNC_IMPL_ADD (array, i, strnlen, HAS_CPU_FEATURE (SSE2),
- __strnlen_sse2)
- IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/strpbrk.S. */
- IFUNC_IMPL (i, name, strpbrk,
- IFUNC_IMPL_ADD (array, i, strpbrk, HAS_CPU_FEATURE (SSE4_2),
- __strpbrk_sse42)
- IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/strrchr.S. */
- IFUNC_IMPL (i, name, strrchr,
- IFUNC_IMPL_ADD (array, i, strrchr, HAS_CPU_FEATURE (SSE2),
- __strrchr_sse2_bsf)
- IFUNC_IMPL_ADD (array, i, strrchr, HAS_CPU_FEATURE (SSE2),
- __strrchr_sse2)
- IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/strspn.S. */
- IFUNC_IMPL (i, name, strspn,
- IFUNC_IMPL_ADD (array, i, strspn, HAS_CPU_FEATURE (SSE4_2),
- __strspn_sse42)
- IFUNC_IMPL_ADD (array, i, strspn, 1, __strspn_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/wcschr.S. */
- IFUNC_IMPL (i, name, wcschr,
- IFUNC_IMPL_ADD (array, i, wcschr, HAS_CPU_FEATURE (SSE2),
- __wcschr_sse2)
- IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/wcscmp.S. */
- IFUNC_IMPL (i, name, wcscmp,
- IFUNC_IMPL_ADD (array, i, wcscmp, HAS_CPU_FEATURE (SSE2),
- __wcscmp_sse2)
- IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/wcscpy.S. */
- IFUNC_IMPL (i, name, wcscpy,
- IFUNC_IMPL_ADD (array, i, wcscpy, HAS_CPU_FEATURE (SSSE3),
- __wcscpy_ssse3)
- IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/wcslen.S. */
- IFUNC_IMPL (i, name, wcslen,
- IFUNC_IMPL_ADD (array, i, wcslen, HAS_CPU_FEATURE (SSE2),
- __wcslen_sse2)
- IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/wcsrchr.S. */
- IFUNC_IMPL (i, name, wcsrchr,
- IFUNC_IMPL_ADD (array, i, wcsrchr, HAS_CPU_FEATURE (SSE2),
- __wcsrchr_sse2)
- IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/wmemcmp.S. */
- IFUNC_IMPL (i, name, wmemcmp,
- IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSE4_2),
- __wmemcmp_sse4_2)
- IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSSE3),
- __wmemcmp_ssse3)
- IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_ia32))
-
-#ifdef SHARED
- /* Support sysdeps/i386/i686/multiarch/memcpy_chk.S. */
- IFUNC_IMPL (i, name, __memcpy_chk,
- IFUNC_IMPL_ADD (array, i, __memcpy_chk,
- HAS_CPU_FEATURE (SSSE3),
- __memcpy_chk_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk,
- HAS_CPU_FEATURE (SSSE3),
- __memcpy_chk_ssse3)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk,
- HAS_CPU_FEATURE (SSE2),
- __memcpy_chk_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
- __memcpy_chk_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/memcpy.S. */
- IFUNC_IMPL (i, name, memcpy,
- IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3),
- __memcpy_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3),
- __memcpy_ssse3)
- IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSE2),
- __memcpy_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/mempcpy_chk.S. */
- IFUNC_IMPL (i, name, __mempcpy_chk,
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
- HAS_CPU_FEATURE (SSSE3),
- __mempcpy_chk_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
- HAS_CPU_FEATURE (SSSE3),
- __mempcpy_chk_ssse3)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
- HAS_CPU_FEATURE (SSE2),
- __mempcpy_chk_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
- __mempcpy_chk_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/mempcpy.S. */
- IFUNC_IMPL (i, name, mempcpy,
- IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
- __mempcpy_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
- __mempcpy_ssse3)
- IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSE2),
- __mempcpy_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/strlen.S. */
- IFUNC_IMPL (i, name, strlen,
- IFUNC_IMPL_ADD (array, i, strlen, HAS_CPU_FEATURE (SSE2),
- __strlen_sse2_bsf)
- IFUNC_IMPL_ADD (array, i, strlen, HAS_CPU_FEATURE (SSE2),
- __strlen_sse2)
- IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_ia32))
-
- /* Support sysdeps/i386/i686/multiarch/strncmp.S. */
- IFUNC_IMPL (i, name, strncmp,
- IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSE4_2),
- __strncmp_sse4_2)
- IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3),
- __strncmp_ssse3)
- IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_ia32))
-#endif
-
- return i;
-}
diff --git a/sysdeps/i386/i686/multiarch/locale-defines.sym b/sysdeps/i386/i686/multiarch/locale-defines.sym
deleted file mode 100644
index aebff9a4f9..0000000000
--- a/sysdeps/i386/i686/multiarch/locale-defines.sym
+++ /dev/null
@@ -1,11 +0,0 @@
-#include <locale/localeinfo.h>
-#include <langinfo.h>
-#include <stddef.h>
-
---
-
-LOCALE_T___LOCALES offsetof (struct __locale_struct, __locales)
-LC_CTYPE
-_NL_CTYPE_NONASCII_CASE
-LOCALE_DATA_VALUES offsetof (struct __locale_data, values)
-SIZEOF_VALUES sizeof (((struct __locale_data *) 0)->values[0])
diff --git a/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S
deleted file mode 100644
index dd316486e6..0000000000
--- a/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S
+++ /dev/null
@@ -1,502 +0,0 @@
-/* Optimized memchr with sse2
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS 4
-# define STR1 PARMS
-# define STR2 STR1+4
-
-# ifndef USE_AS_RAWMEMCHR
-# define LEN STR2+4
-# define RETURN POP(%edi); ret; CFI_PUSH(%edi);
-# endif
-
-# ifndef MEMCHR
-# define MEMCHR __memchr_sse2_bsf
-# endif
-
- .text
-ENTRY (MEMCHR)
-
- mov STR1(%esp), %ecx
- movd STR2(%esp), %xmm1
-
-# ifndef USE_AS_RAWMEMCHR
- mov LEN(%esp), %edx
- test %edx, %edx
- jz L(return_null_1)
-# endif
- mov %ecx, %eax
-
- punpcklbw %xmm1, %xmm1
- punpcklbw %xmm1, %xmm1
-
- and $63, %ecx
- pshufd $0, %xmm1, %xmm1
-
- cmp $48, %ecx
- ja L(crosscache)
-
- movdqu (%eax), %xmm0
- pcmpeqb %xmm1, %xmm0
-/* Check if there is a match. */
- pmovmskb %xmm0, %ecx
- test %ecx, %ecx
- je L(unaligned_no_match_1)
-/* Check which byte is a match. */
- bsf %ecx, %ecx
-
-# ifndef USE_AS_RAWMEMCHR
- sub %ecx, %edx
- jbe L(return_null_1)
-# endif
- add %ecx, %eax
- ret
-
- .p2align 4
-L(unaligned_no_match_1):
-# ifndef USE_AS_RAWMEMCHR
- sub $16, %edx
- jbe L(return_null_1)
- PUSH (%edi)
- lea 16(%eax), %edi
- and $15, %eax
- and $-16, %edi
- add %eax, %edx
-# else
- lea 16(%eax), %edx
- and $-16, %edx
-# endif
- jmp L(loop_prolog)
-
- .p2align 4
-L(return_null_1):
- xor %eax, %eax
- ret
-
-# ifndef USE_AS_RAWMEMCHR
- CFI_POP (%edi)
-# endif
-
- .p2align 4
-L(crosscache):
-/* Handle unaligned string. */
-
-# ifndef USE_AS_RAWMEMCHR
- PUSH (%edi)
- mov %eax, %edi
- and $15, %ecx
- and $-16, %edi
- movdqa (%edi), %xmm0
-# else
- mov %eax, %edx
- and $15, %ecx
- and $-16, %edx
- movdqa (%edx), %xmm0
-# endif
- pcmpeqb %xmm1, %xmm0
-/* Check if there is a match. */
- pmovmskb %xmm0, %eax
-/* Remove the leading bytes. */
- sar %cl, %eax
- test %eax, %eax
- je L(unaligned_no_match)
-/* Check which byte is a match. */
- bsf %eax, %eax
-
-# ifndef USE_AS_RAWMEMCHR
- sub %eax, %edx
- jbe L(return_null)
- add %edi, %eax
- add %ecx, %eax
- RETURN
-# else
- add %edx, %eax
- add %ecx, %eax
- ret
-# endif
-
- .p2align 4
-L(unaligned_no_match):
-# ifndef USE_AS_RAWMEMCHR
- /* Calculate the last acceptable address and check for possible
- addition overflow by using satured math:
- edx = ecx + edx
- edx |= -(edx < ecx) */
- add %ecx, %edx
- sbb %eax, %eax
- or %eax, %edx
- sub $16, %edx
- jbe L(return_null)
- add $16, %edi
-# else
- add $16, %edx
-# endif
-
- .p2align 4
-/* Loop start on aligned string. */
-L(loop_prolog):
-# ifndef USE_AS_RAWMEMCHR
- sub $64, %edx
- jbe L(exit_loop)
- movdqa (%edi), %xmm0
-# else
- movdqa (%edx), %xmm0
-# endif
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 16(%edi), %xmm2
-# else
- movdqa 16(%edx), %xmm2
-# endif
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 32(%edi), %xmm3
-# else
- movdqa 32(%edx), %xmm3
-# endif
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 48(%edi), %xmm4
-# else
- movdqa 48(%edx), %xmm4
-# endif
- pcmpeqb %xmm1, %xmm4
-
-# ifndef USE_AS_RAWMEMCHR
- add $64, %edi
-# else
- add $64, %edx
-# endif
- pmovmskb %xmm4, %eax
- test %eax, %eax
- jnz L(matches0)
-
-# ifndef USE_AS_RAWMEMCHR
- test $0x3f, %edi
-# else
- test $0x3f, %edx
-# endif
- jz L(align64_loop)
-
-# ifndef USE_AS_RAWMEMCHR
- sub $64, %edx
- jbe L(exit_loop)
- movdqa (%edi), %xmm0
-# else
- movdqa (%edx), %xmm0
-# endif
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 16(%edi), %xmm2
-# else
- movdqa 16(%edx), %xmm2
-# endif
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 32(%edi), %xmm3
-# else
- movdqa 32(%edx), %xmm3
-# endif
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 48(%edi), %xmm3
-# else
- movdqa 48(%edx), %xmm3
-# endif
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
-
-# ifndef USE_AS_RAWMEMCHR
- add $64, %edi
-# else
- add $64, %edx
-# endif
- test %eax, %eax
- jnz L(matches0)
-
-# ifndef USE_AS_RAWMEMCHR
- mov %edi, %ecx
- and $-64, %edi
- and $63, %ecx
- add %ecx, %edx
-# else
- and $-64, %edx
-# endif
-
- .p2align 4
-L(align64_loop):
-# ifndef USE_AS_RAWMEMCHR
- sub $64, %edx
- jbe L(exit_loop)
- movdqa (%edi), %xmm0
- movdqa 16(%edi), %xmm2
- movdqa 32(%edi), %xmm3
- movdqa 48(%edi), %xmm4
-# else
- movdqa (%edx), %xmm0
- movdqa 16(%edx), %xmm2
- movdqa 32(%edx), %xmm3
- movdqa 48(%edx), %xmm4
-# endif
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm1, %xmm2
- pcmpeqb %xmm1, %xmm3
- pcmpeqb %xmm1, %xmm4
-
- pmaxub %xmm0, %xmm3
- pmaxub %xmm2, %xmm4
- pmaxub %xmm3, %xmm4
- pmovmskb %xmm4, %eax
-
-# ifndef USE_AS_RAWMEMCHR
- add $64, %edi
-# else
- add $64, %edx
-# endif
-
- test %eax, %eax
- jz L(align64_loop)
-
-# ifndef USE_AS_RAWMEMCHR
- sub $64, %edi
-# else
- sub $64, %edx
-# endif
-
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 32(%edi), %xmm3
-# else
- movdqa 32(%edx), %xmm3
-# endif
-
- pcmpeqb %xmm1, %xmm3
-
-# ifndef USE_AS_RAWMEMCHR
- pcmpeqb 48(%edi), %xmm1
-# else
- pcmpeqb 48(%edx), %xmm1
-# endif
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- pmovmskb %xmm1, %eax
- bsf %eax, %eax
-
-# ifndef USE_AS_RAWMEMCHR
- lea 48(%edi, %eax), %eax
- RETURN
-# else
- lea 48(%edx, %eax), %eax
- ret
-# endif
-
-# ifndef USE_AS_RAWMEMCHR
- .p2align 4
-L(exit_loop):
- add $64, %edx
- cmp $32, %edx
- jbe L(exit_loop_32)
-
- movdqa (%edi), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
-
- movdqa 16(%edi), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa 32(%edi), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32_1)
- cmp $48, %edx
- jbe L(return_null)
-
- pcmpeqb 48(%edi), %xmm1
- pmovmskb %xmm1, %eax
- test %eax, %eax
- jnz L(matches48_1)
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(exit_loop_32):
- movdqa (%edi), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches_1)
- cmp $16, %edx
- jbe L(return_null)
-
- pcmpeqb 16(%edi), %xmm1
- pmovmskb %xmm1, %eax
- test %eax, %eax
- jnz L(matches16_1)
- xor %eax, %eax
- RETURN
-# endif
- .p2align 4
-L(matches0):
- bsf %eax, %eax
-# ifndef USE_AS_RAWMEMCHR
- lea -16(%eax, %edi), %eax
- RETURN
-# else
- lea -16(%eax, %edx), %eax
- ret
-# endif
-
- .p2align 4
-L(matches):
- bsf %eax, %eax
-# ifndef USE_AS_RAWMEMCHR
- add %edi, %eax
- RETURN
-# else
- add %edx, %eax
- ret
-# endif
-
- .p2align 4
-L(matches16):
- bsf %eax, %eax
-# ifndef USE_AS_RAWMEMCHR
- lea 16(%eax, %edi), %eax
- RETURN
-# else
- lea 16(%eax, %edx), %eax
- ret
-# endif
-
- .p2align 4
-L(matches32):
- bsf %eax, %eax
-# ifndef USE_AS_RAWMEMCHR
- lea 32(%eax, %edi), %eax
- RETURN
-# else
- lea 32(%eax, %edx), %eax
- ret
-# endif
-
-# ifndef USE_AS_RAWMEMCHR
- .p2align 4
-L(matches_1):
- bsf %eax, %eax
- sub %eax, %edx
- jbe L(return_null)
-
- add %edi, %eax
- RETURN
-
- .p2align 4
-L(matches16_1):
- sub $16, %edx
- bsf %eax, %eax
- sub %eax, %edx
- jbe L(return_null)
-
- lea 16(%edi, %eax), %eax
- RETURN
-
- .p2align 4
-L(matches32_1):
- sub $32, %edx
- bsf %eax, %eax
- sub %eax, %edx
- jbe L(return_null)
-
- lea 32(%edi, %eax), %eax
- RETURN
-
- .p2align 4
-L(matches48_1):
- sub $48, %edx
- bsf %eax, %eax
- sub %eax, %edx
- jbe L(return_null)
-
- lea 48(%edi, %eax), %eax
- RETURN
-# endif
- .p2align 4
-L(return_null):
- xor %eax, %eax
-# ifndef USE_AS_RAWMEMCHR
- RETURN
-# else
- ret
-# endif
-
-END (MEMCHR)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memchr-sse2.S b/sysdeps/i386/i686/multiarch/memchr-sse2.S
deleted file mode 100644
index 172d70de13..0000000000
--- a/sysdeps/i386/i686/multiarch/memchr-sse2.S
+++ /dev/null
@@ -1,709 +0,0 @@
-/* Optimized memchr with sse2 without bsf
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifndef USE_AS_RAWMEMCHR
-# define ENTRANCE PUSH(%edi);
-# define PARMS 8
-# define RETURN POP(%edi); ret; CFI_PUSH(%edi);
-# else
-# define ENTRANCE
-# define PARMS 4
-# endif
-
-# define STR1 PARMS
-# define STR2 STR1+4
-
-# ifndef USE_AS_RAWMEMCHR
-# define LEN STR2+4
-# endif
-
-# ifndef MEMCHR
-# define MEMCHR __memchr_sse2
-# endif
-
- atom_text_section
-ENTRY (MEMCHR)
- ENTRANCE
- mov STR1(%esp), %ecx
- movd STR2(%esp), %xmm1
-# ifndef USE_AS_RAWMEMCHR
- mov LEN(%esp), %edx
- test %edx, %edx
- jz L(return_null)
-# endif
-
- punpcklbw %xmm1, %xmm1
-# ifndef USE_AS_RAWMEMCHR
- mov %ecx, %edi
-# else
- mov %ecx, %edx
-# endif
- punpcklbw %xmm1, %xmm1
-
- and $63, %ecx
- pshufd $0, %xmm1, %xmm1
- cmp $48, %ecx
- ja L(crosscache)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqu (%edi), %xmm0
-# else
- movdqu (%edx), %xmm0
-# endif
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
-# ifndef USE_AS_RAWMEMCHR
- jnz L(match_case2_prolog)
-
- sub $16, %edx
- jbe L(return_null)
- lea 16(%edi), %edi
- and $15, %ecx
- and $-16, %edi
- add %ecx, %edx
-# else
- jnz L(match_case1_prolog)
- lea 16(%edx), %edx
- and $-16, %edx
-# endif
- jmp L(loop_prolog)
-
- .p2align 4
-L(crosscache):
- and $15, %ecx
-# ifndef USE_AS_RAWMEMCHR
- and $-16, %edi
- movdqa (%edi), %xmm0
-# else
- and $-16, %edx
- movdqa (%edx), %xmm0
-# endif
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- sar %cl, %eax
- test %eax, %eax
-
-# ifndef USE_AS_RAWMEMCHR
- jnz L(match_case2_prolog1)
- /* "ecx" is less than 16. Calculate "edx + ecx - 16" by using
- "edx - (16 - ecx)" instead of "(edx + ecx) - 16" to void
- possible addition overflow. */
- neg %ecx
- add $16, %ecx
- sub %ecx, %edx
- jbe L(return_null)
- lea 16(%edi), %edi
-# else
- jnz L(match_case1_prolog1)
- lea 16(%edx), %edx
-# endif
-
- .p2align 4
-L(loop_prolog):
-# ifndef USE_AS_RAWMEMCHR
- sub $64, %edx
- jbe L(exit_loop)
- movdqa (%edi), %xmm0
-# else
- movdqa (%edx), %xmm0
-# endif
- pcmpeqb %xmm1, %xmm0
- xor %ecx, %ecx
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 16(%edi), %xmm2
-# else
- movdqa 16(%edx), %xmm2
-# endif
- pcmpeqb %xmm1, %xmm2
- lea 16(%ecx), %ecx
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 32(%edi), %xmm3
-# else
- movdqa 32(%edx), %xmm3
-# endif
- pcmpeqb %xmm1, %xmm3
- lea 16(%ecx), %ecx
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 48(%edi), %xmm4
-# else
- movdqa 48(%edx), %xmm4
-# endif
- pcmpeqb %xmm1, %xmm4
- lea 16(%ecx), %ecx
- pmovmskb %xmm4, %eax
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- lea 64(%edi), %edi
- sub $64, %edx
- jbe L(exit_loop)
-
- movdqa (%edi), %xmm0
-# else
- lea 64(%edx), %edx
- movdqa (%edx), %xmm0
-# endif
- pcmpeqb %xmm1, %xmm0
- xor %ecx, %ecx
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 16(%edi), %xmm2
-# else
- movdqa 16(%edx), %xmm2
-# endif
- pcmpeqb %xmm1, %xmm2
- lea 16(%ecx), %ecx
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 32(%edi), %xmm3
-# else
- movdqa 32(%edx), %xmm3
-# endif
- pcmpeqb %xmm1, %xmm3
- lea 16(%ecx), %ecx
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 48(%edi), %xmm4
-# else
- movdqa 48(%edx), %xmm4
-# endif
- pcmpeqb %xmm1, %xmm4
- lea 16(%ecx), %ecx
- pmovmskb %xmm4, %eax
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- lea 64(%edi), %edi
- mov %edi, %ecx
- and $-64, %edi
- and $63, %ecx
- add %ecx, %edx
-# else
- lea 64(%edx), %edx
- and $-64, %edx
-# endif
-
- .p2align 4
-L(align64_loop):
-
-# ifndef USE_AS_RAWMEMCHR
- sub $64, %edx
- jbe L(exit_loop)
- movdqa (%edi), %xmm0
- movdqa 16(%edi), %xmm2
- movdqa 32(%edi), %xmm3
- movdqa 48(%edi), %xmm4
-# else
- movdqa (%edx), %xmm0
- movdqa 16(%edx), %xmm2
- movdqa 32(%edx), %xmm3
- movdqa 48(%edx), %xmm4
-# endif
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm1, %xmm2
- pcmpeqb %xmm1, %xmm3
- pcmpeqb %xmm1, %xmm4
-
- pmaxub %xmm0, %xmm3
- pmaxub %xmm2, %xmm4
- pmaxub %xmm3, %xmm4
-# ifndef USE_AS_RAWMEMCHR
- add $64, %edi
-# else
- add $64, %edx
-# endif
- pmovmskb %xmm4, %eax
-
- test %eax, %eax
- jz L(align64_loop)
-
-# ifndef USE_AS_RAWMEMCHR
- sub $64, %edi
-# else
- sub $64, %edx
-# endif
-
- pmovmskb %xmm0, %eax
- xor %ecx, %ecx
- test %eax, %eax
- jnz L(match_case1)
-
- pmovmskb %xmm2, %eax
- lea 16(%ecx), %ecx
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- movdqa 32(%edi), %xmm3
-# else
- movdqa 32(%edx), %xmm3
-# endif
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- lea 16(%ecx), %ecx
- test %eax, %eax
- jnz L(match_case1)
-
-# ifndef USE_AS_RAWMEMCHR
- pcmpeqb 48(%edi), %xmm1
-# else
- pcmpeqb 48(%edx), %xmm1
-# endif
- pmovmskb %xmm1, %eax
- lea 16(%ecx), %ecx
-
- .p2align 4
-L(match_case1):
-# ifndef USE_AS_RAWMEMCHR
- add %ecx, %edi
-# else
-L(match_case1_prolog1):
- add %ecx, %edx
-L(match_case1_prolog):
-# endif
- test %al, %al
- jz L(match_case1_high)
- mov %al, %cl
- and $15, %cl
- jz L(match_case1_8)
- test $0x01, %al
- jnz L(ExitCase1_1)
- test $0x02, %al
- jnz L(ExitCase1_2)
- test $0x04, %al
- jnz L(ExitCase1_3)
-# ifndef USE_AS_RAWMEMCHR
- lea 3(%edi), %eax
- RETURN
-# else
- lea 3(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(match_case1_8):
- test $0x10, %al
- jnz L(ExitCase1_5)
- test $0x20, %al
- jnz L(ExitCase1_6)
- test $0x40, %al
- jnz L(ExitCase1_7)
-# ifndef USE_AS_RAWMEMCHR
- lea 7(%edi), %eax
- RETURN
-# else
- lea 7(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(match_case1_high):
- mov %ah, %ch
- and $15, %ch
- jz L(match_case1_high_8)
- test $0x01, %ah
- jnz L(ExitCase1_9)
- test $0x02, %ah
- jnz L(ExitCase1_10)
- test $0x04, %ah
- jnz L(ExitCase1_11)
-# ifndef USE_AS_RAWMEMCHR
- lea 11(%edi), %eax
- RETURN
-# else
- lea 11(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(match_case1_high_8):
- test $0x10, %ah
- jnz L(ExitCase1_13)
- test $0x20, %ah
- jnz L(ExitCase1_14)
- test $0x40, %ah
- jnz L(ExitCase1_15)
-# ifndef USE_AS_RAWMEMCHR
- lea 15(%edi), %eax
- RETURN
-# else
- lea 15(%edx), %eax
- ret
-# endif
-
-# ifndef USE_AS_RAWMEMCHR
- .p2align 4
-L(exit_loop):
- add $64, %edx
-
- movdqa (%edi), %xmm0
- pcmpeqb %xmm1, %xmm0
- xor %ecx, %ecx
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(match_case2)
- cmp $16, %edx
- jbe L(return_null)
-
- movdqa 16(%edi), %xmm2
- pcmpeqb %xmm1, %xmm2
- lea 16(%ecx), %ecx
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(match_case2)
- cmp $32, %edx
- jbe L(return_null)
-
- movdqa 32(%edi), %xmm3
- pcmpeqb %xmm1, %xmm3
- lea 16(%ecx), %ecx
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(match_case2)
- cmp $48, %edx
- jbe L(return_null)
-
- pcmpeqb 48(%edi), %xmm1
- lea 16(%ecx), %ecx
- pmovmskb %xmm1, %eax
- test %eax, %eax
- jnz L(match_case2)
-
- xor %eax, %eax
- RETURN
-# endif
-
- .p2align 4
-L(ExitCase1_1):
-# ifndef USE_AS_RAWMEMCHR
- mov %edi, %eax
- RETURN
-# else
- mov %edx, %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_2):
-# ifndef USE_AS_RAWMEMCHR
- lea 1(%edi), %eax
- RETURN
-# else
- lea 1(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_3):
-# ifndef USE_AS_RAWMEMCHR
- lea 2(%edi), %eax
- RETURN
-# else
- lea 2(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_5):
-# ifndef USE_AS_RAWMEMCHR
- lea 4(%edi), %eax
- RETURN
-# else
- lea 4(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_6):
-# ifndef USE_AS_RAWMEMCHR
- lea 5(%edi), %eax
- RETURN
-# else
- lea 5(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_7):
-# ifndef USE_AS_RAWMEMCHR
- lea 6(%edi), %eax
- RETURN
-# else
- lea 6(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_9):
-# ifndef USE_AS_RAWMEMCHR
- lea 8(%edi), %eax
- RETURN
-# else
- lea 8(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_10):
-# ifndef USE_AS_RAWMEMCHR
- lea 9(%edi), %eax
- RETURN
-# else
- lea 9(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_11):
-# ifndef USE_AS_RAWMEMCHR
- lea 10(%edi), %eax
- RETURN
-# else
- lea 10(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_13):
-# ifndef USE_AS_RAWMEMCHR
- lea 12(%edi), %eax
- RETURN
-# else
- lea 12(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_14):
-# ifndef USE_AS_RAWMEMCHR
- lea 13(%edi), %eax
- RETURN
-# else
- lea 13(%edx), %eax
- ret
-# endif
-
- .p2align 4
-L(ExitCase1_15):
-# ifndef USE_AS_RAWMEMCHR
- lea 14(%edi), %eax
- RETURN
-# else
- lea 14(%edx), %eax
- ret
-# endif
-
-# ifndef USE_AS_RAWMEMCHR
- .p2align 4
-L(match_case2):
- sub %ecx, %edx
-L(match_case2_prolog1):
- add %ecx, %edi
-L(match_case2_prolog):
- test %al, %al
- jz L(match_case2_high)
- mov %al, %cl
- and $15, %cl
- jz L(match_case2_8)
- test $0x01, %al
- jnz L(ExitCase2_1)
- test $0x02, %al
- jnz L(ExitCase2_2)
- test $0x04, %al
- jnz L(ExitCase2_3)
- sub $4, %edx
- jb L(return_null)
- lea 3(%edi), %eax
- RETURN
-
- .p2align 4
-L(match_case2_8):
- test $0x10, %al
- jnz L(ExitCase2_5)
- test $0x20, %al
- jnz L(ExitCase2_6)
- test $0x40, %al
- jnz L(ExitCase2_7)
- sub $8, %edx
- jb L(return_null)
- lea 7(%edi), %eax
- RETURN
-
- .p2align 4
-L(match_case2_high):
- mov %ah, %ch
- and $15, %ch
- jz L(match_case2_high_8)
- test $0x01, %ah
- jnz L(ExitCase2_9)
- test $0x02, %ah
- jnz L(ExitCase2_10)
- test $0x04, %ah
- jnz L(ExitCase2_11)
- sub $12, %edx
- jb L(return_null)
- lea 11(%edi), %eax
- RETURN
-
- .p2align 4
-L(match_case2_high_8):
- test $0x10, %ah
- jnz L(ExitCase2_13)
- test $0x20, %ah
- jnz L(ExitCase2_14)
- test $0x40, %ah
- jnz L(ExitCase2_15)
- sub $16, %edx
- jb L(return_null)
- lea 15(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_1):
- mov %edi, %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_2):
- sub $2, %edx
- jb L(return_null)
- lea 1(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_3):
- sub $3, %edx
- jb L(return_null)
- lea 2(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_5):
- sub $5, %edx
- jb L(return_null)
- lea 4(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_6):
- sub $6, %edx
- jb L(return_null)
- lea 5(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_7):
- sub $7, %edx
- jb L(return_null)
- lea 6(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_9):
- sub $9, %edx
- jb L(return_null)
- lea 8(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_10):
- sub $10, %edx
- jb L(return_null)
- lea 9(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_11):
- sub $11, %edx
- jb L(return_null)
- lea 10(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_13):
- sub $13, %edx
- jb L(return_null)
- lea 12(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_14):
- sub $14, %edx
- jb L(return_null)
- lea 13(%edi), %eax
- RETURN
-
- .p2align 4
-L(ExitCase2_15):
- sub $15, %edx
- jb L(return_null)
- lea 14(%edi), %eax
- RETURN
-# endif
-
- .p2align 4
-L(return_null):
- xor %eax, %eax
-# ifndef USE_AS_RAWMEMCHR
- RETURN
-# else
- ret
-# endif
-
-END (MEMCHR)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memchr.S b/sysdeps/i386/i686/multiarch/memchr.S
deleted file mode 100644
index bd0dace290..0000000000
--- a/sysdeps/i386/i686/multiarch/memchr.S
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Multiple versions of memchr
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if IS_IN (libc)
- .text
-ENTRY(__memchr)
- .type __memchr, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- HAS_ARCH_FEATURE (Slow_BSF)
- jz 3f
-
- LOAD_FUNC_GOT_EAX ( __memchr_sse2)
- ret
-
-2: LOAD_FUNC_GOT_EAX (__memchr_ia32)
- ret
-
-3: LOAD_FUNC_GOT_EAX (__memchr_sse2_bsf)
- ret
-END(__memchr)
-
-weak_alias(__memchr, memchr)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __memchr_ia32, @function; \
- .globl __memchr_ia32; \
- .p2align 4; \
- __memchr_ia32: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __memchr_ia32, .-__memchr_ia32
-
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
- they will be called without setting up EBX needed for PLT which is
- used by IFUNC. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_memchr; __GI_memchr = __memchr_ia32
-
-#endif
-#include "../../memchr.S"
diff --git a/sysdeps/i386/i686/multiarch/memcmp-sse4.S b/sysdeps/i386/i686/multiarch/memcmp-sse4.S
deleted file mode 100644
index 2aa13048b2..0000000000
--- a/sysdeps/i386/i686/multiarch/memcmp-sse4.S
+++ /dev/null
@@ -1,1225 +0,0 @@
-/* memcmp with SSE4.2, wmemcmp with SSE4.2
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# ifndef MEMCMP
-# define MEMCMP __memcmp_sse4_2
-# endif
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS 4
-# define BLK1 PARMS
-# define BLK2 BLK1 + 4
-# define LEN BLK2 + 4
-# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx)
-
-
-# ifdef SHARED
-# define JMPTBL(I, B) I - B
-
-/* Load an entry in a jump table into EBX and branch to it. TABLE is a
- jump table with relative offsets. INDEX is a register contains the
- index into the jump table. SCALE is the scale of INDEX. */
-
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
-/* We first load PC into EBX. */ \
- SETUP_PIC_REG(bx); \
-/* Get the address of the jump table. */ \
- addl $(TABLE - .), %ebx; \
-/* Get the entry and convert the relative offset to the \
- absolute address. */ \
- addl (%ebx,INDEX,SCALE), %ebx; \
-/* We loaded the jump table and adjusted EDX/ESI. Go. */ \
- jmp *%ebx
-# else
-# define JMPTBL(I, B) I
-
-/* Load an entry in a jump table into EBX and branch to it. TABLE is a
- jump table with relative offsets. INDEX is a register contains the
- index into the jump table. SCALE is the scale of INDEX. */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- jmp *TABLE(,INDEX,SCALE)
-# endif
-
-
-/* Warning!
- wmemcmp has to use SIGNED comparison for elements.
- memcmp has to use UNSIGNED comparison for elemnts.
-*/
-
- .section .text.sse4.2,"ax",@progbits
-ENTRY (MEMCMP)
- movl BLK1(%esp), %eax
- movl BLK2(%esp), %edx
- movl LEN(%esp), %ecx
-
-# ifdef USE_AS_WMEMCMP
- shl $2, %ecx
- test %ecx, %ecx
- jz L(return0)
-# else
- cmp $1, %ecx
- jbe L(less1bytes)
-# endif
-
- pxor %xmm0, %xmm0
- cmp $64, %ecx
- ja L(64bytesormore)
- cmp $8, %ecx
-
-# ifndef USE_AS_WMEMCMP
- PUSH (%ebx)
- jb L(less8bytes)
-# else
- jb L(less8bytes)
- PUSH (%ebx)
-# endif
-
- add %ecx, %edx
- add %ecx, %eax
- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
-
-# ifndef USE_AS_WMEMCMP
- .p2align 4
-L(less8bytes):
- mov (%eax), %bl
- cmpb (%edx), %bl
- jne L(nonzero)
-
- mov 1(%eax), %bl
- cmpb 1(%edx), %bl
- jne L(nonzero)
-
- cmp $2, %ecx
- jz L(0bytes)
-
- mov 2(%eax), %bl
- cmpb 2(%edx), %bl
- jne L(nonzero)
-
- cmp $3, %ecx
- jz L(0bytes)
-
- mov 3(%eax), %bl
- cmpb 3(%edx), %bl
- jne L(nonzero)
-
- cmp $4, %ecx
- jz L(0bytes)
-
- mov 4(%eax), %bl
- cmpb 4(%edx), %bl
- jne L(nonzero)
-
- cmp $5, %ecx
- jz L(0bytes)
-
- mov 5(%eax), %bl
- cmpb 5(%edx), %bl
- jne L(nonzero)
-
- cmp $6, %ecx
- jz L(0bytes)
-
- mov 6(%eax), %bl
- cmpb 6(%edx), %bl
- je L(0bytes)
-
-L(nonzero):
- POP (%ebx)
- mov $1, %eax
- ja L(above)
- neg %eax
-L(above):
- ret
- CFI_PUSH (%ebx)
-# endif
-
- .p2align 4
-L(0bytes):
- POP (%ebx)
- xor %eax, %eax
- ret
-
-# ifdef USE_AS_WMEMCMP
-
-/* for wmemcmp, case N == 1 */
-
- .p2align 4
-L(less8bytes):
- mov (%eax), %ecx
- cmp (%edx), %ecx
- je L(return0)
- mov $1, %eax
- jg L(find_diff_bigger)
- neg %eax
- ret
-
- .p2align 4
-L(find_diff_bigger):
- ret
-
- .p2align 4
-L(return0):
- xor %eax, %eax
- ret
-# endif
-
-# ifndef USE_AS_WMEMCMP
- .p2align 4
-L(less1bytes):
- jb L(0bytesend)
- movzbl (%eax), %eax
- movzbl (%edx), %edx
- sub %edx, %eax
- ret
-
- .p2align 4
-L(0bytesend):
- xor %eax, %eax
- ret
-# endif
- .p2align 4
-L(64bytesormore):
- PUSH (%ebx)
- mov %ecx, %ebx
- mov $64, %ecx
- sub $64, %ebx
-L(64bytesormore_loop):
- movdqu (%eax), %xmm1
- movdqu (%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(find_16diff)
-
- movdqu 16(%eax), %xmm1
- movdqu 16(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(find_32diff)
-
- movdqu 32(%eax), %xmm1
- movdqu 32(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(find_48diff)
-
- movdqu 48(%eax), %xmm1
- movdqu 48(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(find_64diff)
- add %ecx, %eax
- add %ecx, %edx
- sub %ecx, %ebx
- jae L(64bytesormore_loop)
- add %ebx, %ecx
- add %ecx, %edx
- add %ecx, %eax
- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
-
-# ifdef USE_AS_WMEMCMP
-
-/* Label needs only for table_64bytes filling */
-L(unreal_case):
-/* no code here */
-
-# endif
- .p2align 4
-L(find_16diff):
- sub $16, %ecx
-L(find_32diff):
- sub $16, %ecx
-L(find_48diff):
- sub $16, %ecx
-L(find_64diff):
- add %ecx, %edx
- add %ecx, %eax
-
-# ifndef USE_AS_WMEMCMP
- .p2align 4
-L(16bytes):
- mov -16(%eax), %ecx
- mov -16(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(12bytes):
- mov -12(%eax), %ecx
- mov -12(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(8bytes):
- mov -8(%eax), %ecx
- mov -8(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(4bytes):
- mov -4(%eax), %ecx
- mov -4(%edx), %ebx
- cmp %ebx, %ecx
- mov $0, %eax
- jne L(find_diff)
- RETURN
-# else
- .p2align 4
-L(16bytes):
- mov -16(%eax), %ecx
- cmp -16(%edx), %ecx
- jne L(find_diff)
-L(12bytes):
- mov -12(%eax), %ecx
- cmp -12(%edx), %ecx
- jne L(find_diff)
-L(8bytes):
- mov -8(%eax), %ecx
- cmp -8(%edx), %ecx
- jne L(find_diff)
-L(4bytes):
- mov -4(%eax), %ecx
- cmp -4(%edx), %ecx
- mov $0, %eax
- jne L(find_diff)
- RETURN
-# endif
-
-# ifndef USE_AS_WMEMCMP
- .p2align 4
-L(49bytes):
- movdqu -49(%eax), %xmm1
- movdqu -49(%edx), %xmm2
- mov $-49, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(33bytes):
- movdqu -33(%eax), %xmm1
- movdqu -33(%edx), %xmm2
- mov $-33, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(17bytes):
- mov -17(%eax), %ecx
- mov -17(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(13bytes):
- mov -13(%eax), %ecx
- mov -13(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(9bytes):
- mov -9(%eax), %ecx
- mov -9(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(5bytes):
- mov -5(%eax), %ecx
- mov -5(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzbl -1(%eax), %ecx
- cmp -1(%edx), %cl
- mov $0, %eax
- jne L(end)
- RETURN
-
- .p2align 4
-L(50bytes):
- mov $-50, %ebx
- movdqu -50(%eax), %xmm1
- movdqu -50(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(34bytes):
- mov $-34, %ebx
- movdqu -34(%eax), %xmm1
- movdqu -34(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(18bytes):
- mov -18(%eax), %ecx
- mov -18(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(14bytes):
- mov -14(%eax), %ecx
- mov -14(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(10bytes):
- mov -10(%eax), %ecx
- mov -10(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(6bytes):
- mov -6(%eax), %ecx
- mov -6(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(2bytes):
- movzwl -2(%eax), %ecx
- movzwl -2(%edx), %ebx
- cmp %bl, %cl
- jne L(end)
- cmp %bh, %ch
- mov $0, %eax
- jne L(end)
- RETURN
-
- .p2align 4
-L(51bytes):
- mov $-51, %ebx
- movdqu -51(%eax), %xmm1
- movdqu -51(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(35bytes):
- mov $-35, %ebx
- movdqu -35(%eax), %xmm1
- movdqu -35(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(19bytes):
- movl -19(%eax), %ecx
- movl -19(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(15bytes):
- movl -15(%eax), %ecx
- movl -15(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(11bytes):
- movl -11(%eax), %ecx
- movl -11(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(7bytes):
- movl -7(%eax), %ecx
- movl -7(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(3bytes):
- movzwl -3(%eax), %ecx
- movzwl -3(%edx), %ebx
- cmpb %bl, %cl
- jne L(end)
- cmp %bx, %cx
- jne L(end)
-L(1bytes):
- movzbl -1(%eax), %eax
- cmpb -1(%edx), %al
- mov $0, %eax
- jne L(end)
- RETURN
-# endif
- .p2align 4
-L(52bytes):
- movdqu -52(%eax), %xmm1
- movdqu -52(%edx), %xmm2
- mov $-52, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(36bytes):
- movdqu -36(%eax), %xmm1
- movdqu -36(%edx), %xmm2
- mov $-36, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(20bytes):
- movdqu -20(%eax), %xmm1
- movdqu -20(%edx), %xmm2
- mov $-20, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- mov -4(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -4(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -4(%edx), %ecx
-# endif
- mov $0, %eax
- jne L(find_diff)
- RETURN
-
-# ifndef USE_AS_WMEMCMP
- .p2align 4
-L(53bytes):
- movdqu -53(%eax), %xmm1
- movdqu -53(%edx), %xmm2
- mov $-53, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(37bytes):
- mov $-37, %ebx
- movdqu -37(%eax), %xmm1
- movdqu -37(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(21bytes):
- mov $-21, %ebx
- movdqu -21(%eax), %xmm1
- movdqu -21(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- mov -5(%eax), %ecx
- mov -5(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzbl -1(%eax), %ecx
- cmp -1(%edx), %cl
- mov $0, %eax
- jne L(end)
- RETURN
-
- .p2align 4
-L(54bytes):
- movdqu -54(%eax), %xmm1
- movdqu -54(%edx), %xmm2
- mov $-54, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(38bytes):
- mov $-38, %ebx
- movdqu -38(%eax), %xmm1
- movdqu -38(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(22bytes):
- mov $-22, %ebx
- movdqu -22(%eax), %xmm1
- movdqu -22(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-
- mov -6(%eax), %ecx
- mov -6(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzwl -2(%eax), %ecx
- movzwl -2(%edx), %ebx
- cmp %bl, %cl
- jne L(end)
- cmp %bh, %ch
- mov $0, %eax
- jne L(end)
- RETURN
-
- .p2align 4
-L(55bytes):
- movdqu -55(%eax), %xmm1
- movdqu -55(%edx), %xmm2
- mov $-55, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(39bytes):
- mov $-39, %ebx
- movdqu -39(%eax), %xmm1
- movdqu -39(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(23bytes):
- mov $-23, %ebx
- movdqu -23(%eax), %xmm1
- movdqu -23(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- movl -7(%eax), %ecx
- movl -7(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzwl -3(%eax), %ecx
- movzwl -3(%edx), %ebx
- cmpb %bl, %cl
- jne L(end)
- cmp %bx, %cx
- jne L(end)
- movzbl -1(%eax), %eax
- cmpb -1(%edx), %al
- mov $0, %eax
- jne L(end)
- RETURN
-# endif
- .p2align 4
-L(56bytes):
- movdqu -56(%eax), %xmm1
- movdqu -56(%edx), %xmm2
- mov $-56, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(40bytes):
- mov $-40, %ebx
- movdqu -40(%eax), %xmm1
- movdqu -40(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(24bytes):
- mov $-24, %ebx
- movdqu -24(%eax), %xmm1
- movdqu -24(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-
- mov -8(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -8(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -8(%edx), %ecx
-# endif
- jne L(find_diff)
-
- mov -4(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -4(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -4(%edx), %ecx
-# endif
- mov $0, %eax
- jne L(find_diff)
- RETURN
-
-# ifndef USE_AS_WMEMCMP
- .p2align 4
-L(57bytes):
- movdqu -57(%eax), %xmm1
- movdqu -57(%edx), %xmm2
- mov $-57, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(41bytes):
- mov $-41, %ebx
- movdqu -41(%eax), %xmm1
- movdqu -41(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(25bytes):
- mov $-25, %ebx
- movdqu -25(%eax), %xmm1
- movdqu -25(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- mov -9(%eax), %ecx
- mov -9(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- mov -5(%eax), %ecx
- mov -5(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzbl -1(%eax), %ecx
- cmp -1(%edx), %cl
- mov $0, %eax
- jne L(end)
- RETURN
-
- .p2align 4
-L(58bytes):
- movdqu -58(%eax), %xmm1
- movdqu -58(%edx), %xmm2
- mov $-58, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(42bytes):
- mov $-42, %ebx
- movdqu -42(%eax), %xmm1
- movdqu -42(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(26bytes):
- mov $-26, %ebx
- movdqu -26(%eax), %xmm1
- movdqu -26(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-
- mov -10(%eax), %ecx
- mov -10(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-
- mov -6(%eax), %ecx
- mov -6(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-
- movzwl -2(%eax), %ecx
- movzwl -2(%edx), %ebx
- cmp %bl, %cl
- jne L(end)
- cmp %bh, %ch
- mov $0, %eax
- jne L(end)
- RETURN
-
- .p2align 4
-L(59bytes):
- movdqu -59(%eax), %xmm1
- movdqu -59(%edx), %xmm2
- mov $-59, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(43bytes):
- mov $-43, %ebx
- movdqu -43(%eax), %xmm1
- movdqu -43(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(27bytes):
- mov $-27, %ebx
- movdqu -27(%eax), %xmm1
- movdqu -27(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- movl -11(%eax), %ecx
- movl -11(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movl -7(%eax), %ecx
- movl -7(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzwl -3(%eax), %ecx
- movzwl -3(%edx), %ebx
- cmpb %bl, %cl
- jne L(end)
- cmp %bx, %cx
- jne L(end)
- movzbl -1(%eax), %eax
- cmpb -1(%edx), %al
- mov $0, %eax
- jne L(end)
- RETURN
-# endif
- .p2align 4
-L(60bytes):
- movdqu -60(%eax), %xmm1
- movdqu -60(%edx), %xmm2
- mov $-60, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(44bytes):
- mov $-44, %ebx
- movdqu -44(%eax), %xmm1
- movdqu -44(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(28bytes):
- mov $-28, %ebx
- movdqu -28(%eax), %xmm1
- movdqu -28(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-
- mov -12(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -12(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -12(%edx), %ecx
-# endif
- jne L(find_diff)
-
- mov -8(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -8(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -8(%edx), %ecx
-# endif
- jne L(find_diff)
-
- mov -4(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -4(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -4(%edx), %ecx
-# endif
- mov $0, %eax
- jne L(find_diff)
- RETURN
-
-# ifndef USE_AS_WMEMCMP
- .p2align 4
-L(61bytes):
- movdqu -61(%eax), %xmm1
- movdqu -61(%edx), %xmm2
- mov $-61, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(45bytes):
- mov $-45, %ebx
- movdqu -45(%eax), %xmm1
- movdqu -45(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(29bytes):
- mov $-29, %ebx
- movdqu -29(%eax), %xmm1
- movdqu -29(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-
- mov -13(%eax), %ecx
- mov -13(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-
- mov -9(%eax), %ecx
- mov -9(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-
- mov -5(%eax), %ecx
- mov -5(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzbl -1(%eax), %ecx
- cmp -1(%edx), %cl
- mov $0, %eax
- jne L(end)
- RETURN
-
- .p2align 4
-L(62bytes):
- movdqu -62(%eax), %xmm1
- movdqu -62(%edx), %xmm2
- mov $-62, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(46bytes):
- mov $-46, %ebx
- movdqu -46(%eax), %xmm1
- movdqu -46(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(30bytes):
- mov $-30, %ebx
- movdqu -30(%eax), %xmm1
- movdqu -30(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- mov -14(%eax), %ecx
- mov -14(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- mov -10(%eax), %ecx
- mov -10(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- mov -6(%eax), %ecx
- mov -6(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzwl -2(%eax), %ecx
- movzwl -2(%edx), %ebx
- cmp %bl, %cl
- jne L(end)
- cmp %bh, %ch
- mov $0, %eax
- jne L(end)
- RETURN
-
- .p2align 4
-L(63bytes):
- movdqu -63(%eax), %xmm1
- movdqu -63(%edx), %xmm2
- mov $-63, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(47bytes):
- mov $-47, %ebx
- movdqu -47(%eax), %xmm1
- movdqu -47(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(31bytes):
- mov $-31, %ebx
- movdqu -31(%eax), %xmm1
- movdqu -31(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-
- movl -15(%eax), %ecx
- movl -15(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movl -11(%eax), %ecx
- movl -11(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movl -7(%eax), %ecx
- movl -7(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzwl -3(%eax), %ecx
- movzwl -3(%edx), %ebx
- cmpb %bl, %cl
- jne L(end)
- cmp %bx, %cx
- jne L(end)
- movzbl -1(%eax), %eax
- cmpb -1(%edx), %al
- mov $0, %eax
- jne L(end)
- RETURN
-# endif
-
- .p2align 4
-L(64bytes):
- movdqu -64(%eax), %xmm1
- movdqu -64(%edx), %xmm2
- mov $-64, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(48bytes):
- movdqu -48(%eax), %xmm1
- movdqu -48(%edx), %xmm2
- mov $-48, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(32bytes):
- movdqu -32(%eax), %xmm1
- movdqu -32(%edx), %xmm2
- mov $-32, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-
- mov -16(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -16(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -16(%edx), %ecx
-# endif
- jne L(find_diff)
-
- mov -12(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -12(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -12(%edx), %ecx
-# endif
- jne L(find_diff)
-
- mov -8(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -8(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -8(%edx), %ecx
-# endif
- jne L(find_diff)
-
- mov -4(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -4(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -4(%edx), %ecx
-# endif
- mov $0, %eax
- jne L(find_diff)
- RETURN
-
-# ifndef USE_AS_WMEMCMP
- .p2align 4
-L(less16bytes):
- add %ebx, %eax
- add %ebx, %edx
-
- mov (%eax), %ecx
- mov (%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-
- mov 4(%eax), %ecx
- mov 4(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-
- mov 8(%eax), %ecx
- mov 8(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-
- mov 12(%eax), %ecx
- mov 12(%edx), %ebx
- cmp %ebx, %ecx
- mov $0, %eax
- jne L(find_diff)
- RETURN
-# else
- .p2align 4
-L(less16bytes):
- add %ebx, %eax
- add %ebx, %edx
-
- mov (%eax), %ecx
- cmp (%edx), %ecx
- jne L(find_diff)
-
- mov 4(%eax), %ecx
- cmp 4(%edx), %ecx
- jne L(find_diff)
-
- mov 8(%eax), %ecx
- cmp 8(%edx), %ecx
- jne L(find_diff)
-
- mov 12(%eax), %ecx
- cmp 12(%edx), %ecx
-
- mov $0, %eax
- jne L(find_diff)
- RETURN
-# endif
-
- .p2align 4
-L(find_diff):
-# ifndef USE_AS_WMEMCMP
- cmpb %bl, %cl
- jne L(end)
- cmp %bx, %cx
- jne L(end)
- shr $16,%ecx
- shr $16,%ebx
- cmp %bl, %cl
- jne L(end)
- cmp %bx, %cx
-L(end):
- POP (%ebx)
- mov $1, %eax
- ja L(bigger)
- neg %eax
-L(bigger):
- ret
-# else
- POP (%ebx)
- mov $1, %eax
- jg L(bigger)
- neg %eax
- ret
-
- .p2align 4
-L(bigger):
- ret
-# endif
-END (MEMCMP)
-
- .section .rodata.sse4.2,"a",@progbits
- .p2align 2
- .type L(table_64bytes), @object
-# ifndef USE_AS_WMEMCMP
-L(table_64bytes):
- .int JMPTBL (L(0bytes), L(table_64bytes))
- .int JMPTBL (L(1bytes), L(table_64bytes))
- .int JMPTBL (L(2bytes), L(table_64bytes))
- .int JMPTBL (L(3bytes), L(table_64bytes))
- .int JMPTBL (L(4bytes), L(table_64bytes))
- .int JMPTBL (L(5bytes), L(table_64bytes))
- .int JMPTBL (L(6bytes), L(table_64bytes))
- .int JMPTBL (L(7bytes), L(table_64bytes))
- .int JMPTBL (L(8bytes), L(table_64bytes))
- .int JMPTBL (L(9bytes), L(table_64bytes))
- .int JMPTBL (L(10bytes), L(table_64bytes))
- .int JMPTBL (L(11bytes), L(table_64bytes))
- .int JMPTBL (L(12bytes), L(table_64bytes))
- .int JMPTBL (L(13bytes), L(table_64bytes))
- .int JMPTBL (L(14bytes), L(table_64bytes))
- .int JMPTBL (L(15bytes), L(table_64bytes))
- .int JMPTBL (L(16bytes), L(table_64bytes))
- .int JMPTBL (L(17bytes), L(table_64bytes))
- .int JMPTBL (L(18bytes), L(table_64bytes))
- .int JMPTBL (L(19bytes), L(table_64bytes))
- .int JMPTBL (L(20bytes), L(table_64bytes))
- .int JMPTBL (L(21bytes), L(table_64bytes))
- .int JMPTBL (L(22bytes), L(table_64bytes))
- .int JMPTBL (L(23bytes), L(table_64bytes))
- .int JMPTBL (L(24bytes), L(table_64bytes))
- .int JMPTBL (L(25bytes), L(table_64bytes))
- .int JMPTBL (L(26bytes), L(table_64bytes))
- .int JMPTBL (L(27bytes), L(table_64bytes))
- .int JMPTBL (L(28bytes), L(table_64bytes))
- .int JMPTBL (L(29bytes), L(table_64bytes))
- .int JMPTBL (L(30bytes), L(table_64bytes))
- .int JMPTBL (L(31bytes), L(table_64bytes))
- .int JMPTBL (L(32bytes), L(table_64bytes))
- .int JMPTBL (L(33bytes), L(table_64bytes))
- .int JMPTBL (L(34bytes), L(table_64bytes))
- .int JMPTBL (L(35bytes), L(table_64bytes))
- .int JMPTBL (L(36bytes), L(table_64bytes))
- .int JMPTBL (L(37bytes), L(table_64bytes))
- .int JMPTBL (L(38bytes), L(table_64bytes))
- .int JMPTBL (L(39bytes), L(table_64bytes))
- .int JMPTBL (L(40bytes), L(table_64bytes))
- .int JMPTBL (L(41bytes), L(table_64bytes))
- .int JMPTBL (L(42bytes), L(table_64bytes))
- .int JMPTBL (L(43bytes), L(table_64bytes))
- .int JMPTBL (L(44bytes), L(table_64bytes))
- .int JMPTBL (L(45bytes), L(table_64bytes))
- .int JMPTBL (L(46bytes), L(table_64bytes))
- .int JMPTBL (L(47bytes), L(table_64bytes))
- .int JMPTBL (L(48bytes), L(table_64bytes))
- .int JMPTBL (L(49bytes), L(table_64bytes))
- .int JMPTBL (L(50bytes), L(table_64bytes))
- .int JMPTBL (L(51bytes), L(table_64bytes))
- .int JMPTBL (L(52bytes), L(table_64bytes))
- .int JMPTBL (L(53bytes), L(table_64bytes))
- .int JMPTBL (L(54bytes), L(table_64bytes))
- .int JMPTBL (L(55bytes), L(table_64bytes))
- .int JMPTBL (L(56bytes), L(table_64bytes))
- .int JMPTBL (L(57bytes), L(table_64bytes))
- .int JMPTBL (L(58bytes), L(table_64bytes))
- .int JMPTBL (L(59bytes), L(table_64bytes))
- .int JMPTBL (L(60bytes), L(table_64bytes))
- .int JMPTBL (L(61bytes), L(table_64bytes))
- .int JMPTBL (L(62bytes), L(table_64bytes))
- .int JMPTBL (L(63bytes), L(table_64bytes))
- .int JMPTBL (L(64bytes), L(table_64bytes))
-# else
-L(table_64bytes):
- .int JMPTBL (L(0bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(4bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(8bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(12bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(16bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(20bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(24bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(28bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(32bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(36bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(40bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(44bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(48bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(52bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(56bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(60bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(64bytes), L(table_64bytes))
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memcmp-ssse3.S b/sysdeps/i386/i686/multiarch/memcmp-ssse3.S
deleted file mode 100644
index 5ebf5a4d73..0000000000
--- a/sysdeps/i386/i686/multiarch/memcmp-ssse3.S
+++ /dev/null
@@ -1,2157 +0,0 @@
-/* memcmp with SSSE3, wmemcmp with SSSE3
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# ifndef MEMCMP
-# define MEMCMP __memcmp_ssse3
-# endif
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS 4
-# define BLK1 PARMS
-# define BLK2 BLK1+4
-# define LEN BLK2+4
-# define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret
-# define RETURN RETURN_END; cfi_restore_state; cfi_remember_state
-
-/* Warning!
- wmemcmp has to use SIGNED comparison for elements.
- memcmp has to use UNSIGNED comparison for elemnts.
-*/
-
- atom_text_section
-ENTRY (MEMCMP)
- movl LEN(%esp), %ecx
-
-# ifdef USE_AS_WMEMCMP
- shl $2, %ecx
- test %ecx, %ecx
- jz L(zero)
-# endif
-
- movl BLK1(%esp), %eax
- cmp $48, %ecx
- movl BLK2(%esp), %edx
- jae L(48bytesormore)
-
-# ifndef USE_AS_WMEMCMP
- cmp $1, %ecx
- jbe L(less1bytes)
-# endif
-
- PUSH (%ebx)
- add %ecx, %edx
- add %ecx, %eax
- jmp L(less48bytes)
-
- CFI_POP (%ebx)
-
-# ifndef USE_AS_WMEMCMP
- .p2align 4
-L(less1bytes):
- jb L(zero)
- movb (%eax), %cl
- cmp (%edx), %cl
- je L(zero)
- mov $1, %eax
- ja L(1bytesend)
- neg %eax
-L(1bytesend):
- ret
-# endif
-
- .p2align 4
-L(zero):
- xor %eax, %eax
- ret
-
- .p2align 4
-L(48bytesormore):
- PUSH (%ebx)
- PUSH (%esi)
- PUSH (%edi)
- cfi_remember_state
- movdqu (%eax), %xmm3
- movdqu (%edx), %xmm0
- movl %eax, %edi
- movl %edx, %esi
- pcmpeqb %xmm0, %xmm3
- pmovmskb %xmm3, %edx
- lea 16(%edi), %edi
-
- sub $0xffff, %edx
- lea 16(%esi), %esi
- jnz L(less16bytes)
- mov %edi, %edx
- and $0xf, %edx
- xor %edx, %edi
- sub %edx, %esi
- add %edx, %ecx
- mov %esi, %edx
- and $0xf, %edx
- jz L(shr_0)
- xor %edx, %esi
-
-# ifndef USE_AS_WMEMCMP
- cmp $8, %edx
- jae L(next_unaligned_table)
- cmp $0, %edx
- je L(shr_0)
- cmp $1, %edx
- je L(shr_1)
- cmp $2, %edx
- je L(shr_2)
- cmp $3, %edx
- je L(shr_3)
- cmp $4, %edx
- je L(shr_4)
- cmp $5, %edx
- je L(shr_5)
- cmp $6, %edx
- je L(shr_6)
- jmp L(shr_7)
-
- .p2align 2
-L(next_unaligned_table):
- cmp $8, %edx
- je L(shr_8)
- cmp $9, %edx
- je L(shr_9)
- cmp $10, %edx
- je L(shr_10)
- cmp $11, %edx
- je L(shr_11)
- cmp $12, %edx
- je L(shr_12)
- cmp $13, %edx
- je L(shr_13)
- cmp $14, %edx
- je L(shr_14)
- jmp L(shr_15)
-# else
- cmp $0, %edx
- je L(shr_0)
- cmp $4, %edx
- je L(shr_4)
- cmp $8, %edx
- je L(shr_8)
- jmp L(shr_12)
-# endif
-
- .p2align 4
-L(shr_0):
- cmp $80, %ecx
- jae L(shr_0_gobble)
- lea -48(%ecx), %ecx
- xor %eax, %eax
- movaps (%esi), %xmm1
- pcmpeqb (%edi), %xmm1
- movaps 16(%esi), %xmm2
- pcmpeqb 16(%edi), %xmm2
- pand %xmm1, %xmm2
- pmovmskb %xmm2, %edx
- add $32, %edi
- add $32, %esi
- sub $0xffff, %edx
- jnz L(exit)
-
- lea (%ecx, %edi,1), %eax
- lea (%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_0_gobble):
- lea -48(%ecx), %ecx
- movdqa (%esi), %xmm0
- xor %eax, %eax
- pcmpeqb (%edi), %xmm0
- sub $32, %ecx
- movdqa 16(%esi), %xmm2
- pcmpeqb 16(%edi), %xmm2
-L(shr_0_gobble_loop):
- pand %xmm0, %xmm2
- sub $32, %ecx
- pmovmskb %xmm2, %edx
- movdqa %xmm0, %xmm1
- movdqa 32(%esi), %xmm0
- movdqa 48(%esi), %xmm2
- sbb $0xffff, %edx
- pcmpeqb 32(%edi), %xmm0
- pcmpeqb 48(%edi), %xmm2
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- jz L(shr_0_gobble_loop)
-
- pand %xmm0, %xmm2
- cmp $0, %ecx
- jge L(shr_0_gobble_loop_next)
- inc %edx
- add $32, %ecx
-L(shr_0_gobble_loop_next):
- test %edx, %edx
- jnz L(exit)
-
- pmovmskb %xmm2, %edx
- movdqa %xmm0, %xmm1
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
- lea (%ecx, %edi,1), %eax
- lea (%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
-# ifndef USE_AS_WMEMCMP
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_1):
- cmp $80, %ecx
- lea -48(%ecx), %ecx
- mov %edx, %eax
- jae L(shr_1_gobble)
-
- movdqa 16(%esi), %xmm1
- movdqa %xmm1, %xmm2
- palignr $1,(%esi), %xmm1
- pcmpeqb (%edi), %xmm1
-
- movdqa 32(%esi), %xmm3
- palignr $1,%xmm2, %xmm3
- pcmpeqb 16(%edi), %xmm3
-
- pand %xmm1, %xmm3
- pmovmskb %xmm3, %edx
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
- lea (%ecx, %edi,1), %eax
- lea 1(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_1_gobble):
- sub $32, %ecx
- movdqa 16(%esi), %xmm0
- palignr $1,(%esi), %xmm0
- pcmpeqb (%edi), %xmm0
-
- movdqa 32(%esi), %xmm3
- palignr $1,16(%esi), %xmm3
- pcmpeqb 16(%edi), %xmm3
-
-L(shr_1_gobble_loop):
- pand %xmm0, %xmm3
- sub $32, %ecx
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
-
- movdqa 64(%esi), %xmm3
- palignr $1,48(%esi), %xmm3
- sbb $0xffff, %edx
- movdqa 48(%esi), %xmm0
- palignr $1,32(%esi), %xmm0
- pcmpeqb 32(%edi), %xmm0
- lea 32(%esi), %esi
- pcmpeqb 48(%edi), %xmm3
-
- lea 32(%edi), %edi
- jz L(shr_1_gobble_loop)
- pand %xmm0, %xmm3
-
- cmp $0, %ecx
- jge L(shr_1_gobble_next)
- inc %edx
- add $32, %ecx
-L(shr_1_gobble_next):
- test %edx, %edx
- jnz L(exit)
-
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
-
- lea (%ecx, %edi,1), %eax
- lea 1(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_2):
- cmp $80, %ecx
- lea -48(%ecx), %ecx
- mov %edx, %eax
- jae L(shr_2_gobble)
-
- movdqa 16(%esi), %xmm1
- movdqa %xmm1, %xmm2
- palignr $2,(%esi), %xmm1
- pcmpeqb (%edi), %xmm1
-
- movdqa 32(%esi), %xmm3
- palignr $2,%xmm2, %xmm3
- pcmpeqb 16(%edi), %xmm3
-
- pand %xmm1, %xmm3
- pmovmskb %xmm3, %edx
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
- lea (%ecx, %edi,1), %eax
- lea 2(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_2_gobble):
- sub $32, %ecx
- movdqa 16(%esi), %xmm0
- palignr $2,(%esi), %xmm0
- pcmpeqb (%edi), %xmm0
-
- movdqa 32(%esi), %xmm3
- palignr $2,16(%esi), %xmm3
- pcmpeqb 16(%edi), %xmm3
-
-L(shr_2_gobble_loop):
- pand %xmm0, %xmm3
- sub $32, %ecx
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
-
- movdqa 64(%esi), %xmm3
- palignr $2,48(%esi), %xmm3
- sbb $0xffff, %edx
- movdqa 48(%esi), %xmm0
- palignr $2,32(%esi), %xmm0
- pcmpeqb 32(%edi), %xmm0
- lea 32(%esi), %esi
- pcmpeqb 48(%edi), %xmm3
-
- lea 32(%edi), %edi
- jz L(shr_2_gobble_loop)
- pand %xmm0, %xmm3
-
- cmp $0, %ecx
- jge L(shr_2_gobble_next)
- inc %edx
- add $32, %ecx
-L(shr_2_gobble_next):
- test %edx, %edx
- jnz L(exit)
-
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
-
- lea (%ecx, %edi,1), %eax
- lea 2(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_3):
- cmp $80, %ecx
- lea -48(%ecx), %ecx
- mov %edx, %eax
- jae L(shr_3_gobble)
-
- movdqa 16(%esi), %xmm1
- movdqa %xmm1, %xmm2
- palignr $3,(%esi), %xmm1
- pcmpeqb (%edi), %xmm1
-
- movdqa 32(%esi), %xmm3
- palignr $3,%xmm2, %xmm3
- pcmpeqb 16(%edi), %xmm3
-
- pand %xmm1, %xmm3
- pmovmskb %xmm3, %edx
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
- lea (%ecx, %edi,1), %eax
- lea 3(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_3_gobble):
- sub $32, %ecx
- movdqa 16(%esi), %xmm0
- palignr $3,(%esi), %xmm0
- pcmpeqb (%edi), %xmm0
-
- movdqa 32(%esi), %xmm3
- palignr $3,16(%esi), %xmm3
- pcmpeqb 16(%edi), %xmm3
-
-L(shr_3_gobble_loop):
- pand %xmm0, %xmm3
- sub $32, %ecx
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
-
- movdqa 64(%esi), %xmm3
- palignr $3,48(%esi), %xmm3
- sbb $0xffff, %edx
- movdqa 48(%esi), %xmm0
- palignr $3,32(%esi), %xmm0
- pcmpeqb 32(%edi), %xmm0
- lea 32(%esi), %esi
- pcmpeqb 48(%edi), %xmm3
-
- lea 32(%edi), %edi
- jz L(shr_3_gobble_loop)
- pand %xmm0, %xmm3
-
- cmp $0, %ecx
- jge L(shr_3_gobble_next)
- inc %edx
- add $32, %ecx
-L(shr_3_gobble_next):
- test %edx, %edx
- jnz L(exit)
-
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
-
- lea (%ecx, %edi,1), %eax
- lea 3(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-# endif
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_4):
- cmp $80, %ecx
- lea -48(%ecx), %ecx
- mov %edx, %eax
- jae L(shr_4_gobble)
-
- movdqa 16(%esi), %xmm1
- movdqa %xmm1, %xmm2
- palignr $4,(%esi), %xmm1
- pcmpeqb (%edi), %xmm1
-
- movdqa 32(%esi), %xmm3
- palignr $4,%xmm2, %xmm3
- pcmpeqb 16(%edi), %xmm3
-
- pand %xmm1, %xmm3
- pmovmskb %xmm3, %edx
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
- lea (%ecx, %edi,1), %eax
- lea 4(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_4_gobble):
- sub $32, %ecx
- movdqa 16(%esi), %xmm0
- palignr $4,(%esi), %xmm0
- pcmpeqb (%edi), %xmm0
-
- movdqa 32(%esi), %xmm3
- palignr $4,16(%esi), %xmm3
- pcmpeqb 16(%edi), %xmm3
-
-L(shr_4_gobble_loop):
- pand %xmm0, %xmm3
- sub $32, %ecx
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
-
- movdqa 64(%esi), %xmm3
- palignr $4,48(%esi), %xmm3
- sbb $0xffff, %edx
- movdqa 48(%esi), %xmm0
- palignr $4,32(%esi), %xmm0
- pcmpeqb 32(%edi), %xmm0
- lea 32(%esi), %esi
- pcmpeqb 48(%edi), %xmm3
-
- lea 32(%edi), %edi
- jz L(shr_4_gobble_loop)
- pand %xmm0, %xmm3
-
- cmp $0, %ecx
- jge L(shr_4_gobble_next)
- inc %edx
- add $32, %ecx
-L(shr_4_gobble_next):
- test %edx, %edx
- jnz L(exit)
-
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
-
- lea (%ecx, %edi,1), %eax
- lea 4(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
-# ifndef USE_AS_WMEMCMP
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_5):
- cmp $80, %ecx
- lea -48(%ecx), %ecx
- mov %edx, %eax
- jae L(shr_5_gobble)
-
- movdqa 16(%esi), %xmm1
- movdqa %xmm1, %xmm2
- palignr $5,(%esi), %xmm1
- pcmpeqb (%edi), %xmm1
-
- movdqa 32(%esi), %xmm3
- palignr $5,%xmm2, %xmm3
- pcmpeqb 16(%edi), %xmm3
-
- pand %xmm1, %xmm3
- pmovmskb %xmm3, %edx
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
- lea (%ecx, %edi,1), %eax
- lea 5(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_5_gobble):
- sub $32, %ecx
- movdqa 16(%esi), %xmm0
- palignr $5,(%esi), %xmm0
- pcmpeqb (%edi), %xmm0
-
- movdqa 32(%esi), %xmm3
- palignr $5,16(%esi), %xmm3
- pcmpeqb 16(%edi), %xmm3
-
-L(shr_5_gobble_loop):
- pand %xmm0, %xmm3
- sub $32, %ecx
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
-
- movdqa 64(%esi), %xmm3
- palignr $5,48(%esi), %xmm3
- sbb $0xffff, %edx
- movdqa 48(%esi), %xmm0
- palignr $5,32(%esi), %xmm0
- pcmpeqb 32(%edi), %xmm0
- lea 32(%esi), %esi
- pcmpeqb 48(%edi), %xmm3
-
- lea 32(%edi), %edi
- jz L(shr_5_gobble_loop)
- pand %xmm0, %xmm3
-
- cmp $0, %ecx
- jge L(shr_5_gobble_next)
- inc %edx
- add $32, %ecx
-L(shr_5_gobble_next):
- test %edx, %edx
- jnz L(exit)
-
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
-
- lea (%ecx, %edi,1), %eax
- lea 5(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_6):
- cmp $80, %ecx
- lea -48(%ecx), %ecx
- mov %edx, %eax
- jae L(shr_6_gobble)
-
- movdqa 16(%esi), %xmm1
- movdqa %xmm1, %xmm2
- palignr $6,(%esi), %xmm1
- pcmpeqb (%edi), %xmm1
-
- movdqa 32(%esi), %xmm3
- palignr $6,%xmm2, %xmm3
- pcmpeqb 16(%edi), %xmm3
-
- pand %xmm1, %xmm3
- pmovmskb %xmm3, %edx
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
- lea (%ecx, %edi,1), %eax
- lea 6(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_6_gobble):
- sub $32, %ecx
- movdqa 16(%esi), %xmm0
- palignr $6,(%esi), %xmm0
- pcmpeqb (%edi), %xmm0
-
- movdqa 32(%esi), %xmm3
- palignr $6,16(%esi), %xmm3
- pcmpeqb 16(%edi), %xmm3
-
-L(shr_6_gobble_loop):
- pand %xmm0, %xmm3
- sub $32, %ecx
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
-
- movdqa 64(%esi), %xmm3
- palignr $6,48(%esi), %xmm3
- sbb $0xffff, %edx
- movdqa 48(%esi), %xmm0
- palignr $6,32(%esi), %xmm0
- pcmpeqb 32(%edi), %xmm0
- lea 32(%esi), %esi
- pcmpeqb 48(%edi), %xmm3
-
- lea 32(%edi), %edi
- jz L(shr_6_gobble_loop)
- pand %xmm0, %xmm3
-
- cmp $0, %ecx
- jge L(shr_6_gobble_next)
- inc %edx
- add $32, %ecx
-L(shr_6_gobble_next):
- test %edx, %edx
- jnz L(exit)
-
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
-
- lea (%ecx, %edi,1), %eax
- lea 6(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_7):
- cmp $80, %ecx
- lea -48(%ecx), %ecx
- mov %edx, %eax
- jae L(shr_7_gobble)
-
- movdqa 16(%esi), %xmm1
- movdqa %xmm1, %xmm2
- palignr $7,(%esi), %xmm1
- pcmpeqb (%edi), %xmm1
-
- movdqa 32(%esi), %xmm3
- palignr $7,%xmm2, %xmm3
- pcmpeqb 16(%edi), %xmm3
-
- pand %xmm1, %xmm3
- pmovmskb %xmm3, %edx
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
- lea (%ecx, %edi,1), %eax
- lea 7(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_7_gobble):
- sub $32, %ecx
- movdqa 16(%esi), %xmm0
- palignr $7,(%esi), %xmm0
- pcmpeqb (%edi), %xmm0
-
- movdqa 32(%esi), %xmm3
- palignr $7,16(%esi), %xmm3
- pcmpeqb 16(%edi), %xmm3
-
-L(shr_7_gobble_loop):
- pand %xmm0, %xmm3
- sub $32, %ecx
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
-
- movdqa 64(%esi), %xmm3
- palignr $7,48(%esi), %xmm3
- sbb $0xffff, %edx
- movdqa 48(%esi), %xmm0
- palignr $7,32(%esi), %xmm0
- pcmpeqb 32(%edi), %xmm0
- lea 32(%esi), %esi
- pcmpeqb 48(%edi), %xmm3
-
- lea 32(%edi), %edi
- jz L(shr_7_gobble_loop)
- pand %xmm0, %xmm3
-
- cmp $0, %ecx
- jge L(shr_7_gobble_next)
- inc %edx
- add $32, %ecx
-L(shr_7_gobble_next):
- test %edx, %edx
- jnz L(exit)
-
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
-
- lea (%ecx, %edi,1), %eax
- lea 7(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-# endif
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_8):
- cmp $80, %ecx
- lea -48(%ecx), %ecx
- mov %edx, %eax
- jae L(shr_8_gobble)
-
- movdqa 16(%esi), %xmm1
- movdqa %xmm1, %xmm2
- palignr $8,(%esi), %xmm1
- pcmpeqb (%edi), %xmm1
-
- movdqa 32(%esi), %xmm3
- palignr $8,%xmm2, %xmm3
- pcmpeqb 16(%edi), %xmm3
-
- pand %xmm1, %xmm3
- pmovmskb %xmm3, %edx
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
- lea (%ecx, %edi,1), %eax
- lea 8(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_8_gobble):
- sub $32, %ecx
- movdqa 16(%esi), %xmm0
- palignr $8,(%esi), %xmm0
- pcmpeqb (%edi), %xmm0
-
- movdqa 32(%esi), %xmm3
- palignr $8,16(%esi), %xmm3
- pcmpeqb 16(%edi), %xmm3
-
-L(shr_8_gobble_loop):
- pand %xmm0, %xmm3
- sub $32, %ecx
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
-
- movdqa 64(%esi), %xmm3
- palignr $8,48(%esi), %xmm3
- sbb $0xffff, %edx
- movdqa 48(%esi), %xmm0
- palignr $8,32(%esi), %xmm0
- pcmpeqb 32(%edi), %xmm0
- lea 32(%esi), %esi
- pcmpeqb 48(%edi), %xmm3
-
- lea 32(%edi), %edi
- jz L(shr_8_gobble_loop)
- pand %xmm0, %xmm3
-
- cmp $0, %ecx
- jge L(shr_8_gobble_next)
- inc %edx
- add $32, %ecx
-L(shr_8_gobble_next):
- test %edx, %edx
- jnz L(exit)
-
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
-
- lea (%ecx, %edi,1), %eax
- lea 8(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
-# ifndef USE_AS_WMEMCMP
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_9):
- cmp $80, %ecx
- lea -48(%ecx), %ecx
- mov %edx, %eax
- jae L(shr_9_gobble)
-
- movdqa 16(%esi), %xmm1
- movdqa %xmm1, %xmm2
- palignr $9,(%esi), %xmm1
- pcmpeqb (%edi), %xmm1
-
- movdqa 32(%esi), %xmm3
- palignr $9,%xmm2, %xmm3
- pcmpeqb 16(%edi), %xmm3
-
- pand %xmm1, %xmm3
- pmovmskb %xmm3, %edx
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
- lea (%ecx, %edi,1), %eax
- lea 9(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_9_gobble):
- sub $32, %ecx
- movdqa 16(%esi), %xmm0
- palignr $9,(%esi), %xmm0
- pcmpeqb (%edi), %xmm0
-
- movdqa 32(%esi), %xmm3
- palignr $9,16(%esi), %xmm3
- pcmpeqb 16(%edi), %xmm3
-
-L(shr_9_gobble_loop):
- pand %xmm0, %xmm3
- sub $32, %ecx
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
-
- movdqa 64(%esi), %xmm3
- palignr $9,48(%esi), %xmm3
- sbb $0xffff, %edx
- movdqa 48(%esi), %xmm0
- palignr $9,32(%esi), %xmm0
- pcmpeqb 32(%edi), %xmm0
- lea 32(%esi), %esi
- pcmpeqb 48(%edi), %xmm3
-
- lea 32(%edi), %edi
- jz L(shr_9_gobble_loop)
- pand %xmm0, %xmm3
-
- cmp $0, %ecx
- jge L(shr_9_gobble_next)
- inc %edx
- add $32, %ecx
-L(shr_9_gobble_next):
- test %edx, %edx
- jnz L(exit)
-
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
-
- lea (%ecx, %edi,1), %eax
- lea 9(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_10):
- cmp $80, %ecx
- lea -48(%ecx), %ecx
- mov %edx, %eax
- jae L(shr_10_gobble)
-
- movdqa 16(%esi), %xmm1
- movdqa %xmm1, %xmm2
- palignr $10, (%esi), %xmm1
- pcmpeqb (%edi), %xmm1
-
- movdqa 32(%esi), %xmm3
- palignr $10,%xmm2, %xmm3
- pcmpeqb 16(%edi), %xmm3
-
- pand %xmm1, %xmm3
- pmovmskb %xmm3, %edx
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
- lea (%ecx, %edi,1), %eax
- lea 10(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_10_gobble):
- sub $32, %ecx
- movdqa 16(%esi), %xmm0
- palignr $10, (%esi), %xmm0
- pcmpeqb (%edi), %xmm0
-
- movdqa 32(%esi), %xmm3
- palignr $10, 16(%esi), %xmm3
- pcmpeqb 16(%edi), %xmm3
-
-L(shr_10_gobble_loop):
- pand %xmm0, %xmm3
- sub $32, %ecx
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
-
- movdqa 64(%esi), %xmm3
- palignr $10,48(%esi), %xmm3
- sbb $0xffff, %edx
- movdqa 48(%esi), %xmm0
- palignr $10,32(%esi), %xmm0
- pcmpeqb 32(%edi), %xmm0
- lea 32(%esi), %esi
- pcmpeqb 48(%edi), %xmm3
-
- lea 32(%edi), %edi
- jz L(shr_10_gobble_loop)
- pand %xmm0, %xmm3
-
- cmp $0, %ecx
- jge L(shr_10_gobble_next)
- inc %edx
- add $32, %ecx
-L(shr_10_gobble_next):
- test %edx, %edx
- jnz L(exit)
-
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
-
- lea (%ecx, %edi,1), %eax
- lea 10(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_11):
- cmp $80, %ecx
- lea -48(%ecx), %ecx
- mov %edx, %eax
- jae L(shr_11_gobble)
-
- movdqa 16(%esi), %xmm1
- movdqa %xmm1, %xmm2
- palignr $11, (%esi), %xmm1
- pcmpeqb (%edi), %xmm1
-
- movdqa 32(%esi), %xmm3
- palignr $11, %xmm2, %xmm3
- pcmpeqb 16(%edi), %xmm3
-
- pand %xmm1, %xmm3
- pmovmskb %xmm3, %edx
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
- lea (%ecx, %edi,1), %eax
- lea 11(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_11_gobble):
- sub $32, %ecx
- movdqa 16(%esi), %xmm0
- palignr $11, (%esi), %xmm0
- pcmpeqb (%edi), %xmm0
-
- movdqa 32(%esi), %xmm3
- palignr $11, 16(%esi), %xmm3
- pcmpeqb 16(%edi), %xmm3
-
-L(shr_11_gobble_loop):
- pand %xmm0, %xmm3
- sub $32, %ecx
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
-
- movdqa 64(%esi), %xmm3
- palignr $11,48(%esi), %xmm3
- sbb $0xffff, %edx
- movdqa 48(%esi), %xmm0
- palignr $11,32(%esi), %xmm0
- pcmpeqb 32(%edi), %xmm0
- lea 32(%esi), %esi
- pcmpeqb 48(%edi), %xmm3
-
- lea 32(%edi), %edi
- jz L(shr_11_gobble_loop)
- pand %xmm0, %xmm3
-
- cmp $0, %ecx
- jge L(shr_11_gobble_next)
- inc %edx
- add $32, %ecx
-L(shr_11_gobble_next):
- test %edx, %edx
- jnz L(exit)
-
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
-
- lea (%ecx, %edi,1), %eax
- lea 11(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-# endif
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_12):
- cmp $80, %ecx
- lea -48(%ecx), %ecx
- mov %edx, %eax
- jae L(shr_12_gobble)
-
- movdqa 16(%esi), %xmm1
- movdqa %xmm1, %xmm2
- palignr $12, (%esi), %xmm1
- pcmpeqb (%edi), %xmm1
-
- movdqa 32(%esi), %xmm3
- palignr $12, %xmm2, %xmm3
- pcmpeqb 16(%edi), %xmm3
-
- pand %xmm1, %xmm3
- pmovmskb %xmm3, %edx
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
- lea (%ecx, %edi,1), %eax
- lea 12(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_12_gobble):
- sub $32, %ecx
- movdqa 16(%esi), %xmm0
- palignr $12, (%esi), %xmm0
- pcmpeqb (%edi), %xmm0
-
- movdqa 32(%esi), %xmm3
- palignr $12, 16(%esi), %xmm3
- pcmpeqb 16(%edi), %xmm3
-
-L(shr_12_gobble_loop):
- pand %xmm0, %xmm3
- sub $32, %ecx
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
-
- movdqa 64(%esi), %xmm3
- palignr $12,48(%esi), %xmm3
- sbb $0xffff, %edx
- movdqa 48(%esi), %xmm0
- palignr $12,32(%esi), %xmm0
- pcmpeqb 32(%edi), %xmm0
- lea 32(%esi), %esi
- pcmpeqb 48(%edi), %xmm3
-
- lea 32(%edi), %edi
- jz L(shr_12_gobble_loop)
- pand %xmm0, %xmm3
-
- cmp $0, %ecx
- jge L(shr_12_gobble_next)
- inc %edx
- add $32, %ecx
-L(shr_12_gobble_next):
- test %edx, %edx
- jnz L(exit)
-
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
-
- lea (%ecx, %edi,1), %eax
- lea 12(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
-# ifndef USE_AS_WMEMCMP
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_13):
- cmp $80, %ecx
- lea -48(%ecx), %ecx
- mov %edx, %eax
- jae L(shr_13_gobble)
-
- movdqa 16(%esi), %xmm1
- movdqa %xmm1, %xmm2
- palignr $13, (%esi), %xmm1
- pcmpeqb (%edi), %xmm1
-
- movdqa 32(%esi), %xmm3
- palignr $13, %xmm2, %xmm3
- pcmpeqb 16(%edi), %xmm3
-
- pand %xmm1, %xmm3
- pmovmskb %xmm3, %edx
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
- lea (%ecx, %edi,1), %eax
- lea 13(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_13_gobble):
- sub $32, %ecx
- movdqa 16(%esi), %xmm0
- palignr $13, (%esi), %xmm0
- pcmpeqb (%edi), %xmm0
-
- movdqa 32(%esi), %xmm3
- palignr $13, 16(%esi), %xmm3
- pcmpeqb 16(%edi), %xmm3
-
-L(shr_13_gobble_loop):
- pand %xmm0, %xmm3
- sub $32, %ecx
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
-
- movdqa 64(%esi), %xmm3
- palignr $13,48(%esi), %xmm3
- sbb $0xffff, %edx
- movdqa 48(%esi), %xmm0
- palignr $13,32(%esi), %xmm0
- pcmpeqb 32(%edi), %xmm0
- lea 32(%esi), %esi
- pcmpeqb 48(%edi), %xmm3
-
- lea 32(%edi), %edi
- jz L(shr_13_gobble_loop)
- pand %xmm0, %xmm3
-
- cmp $0, %ecx
- jge L(shr_13_gobble_next)
- inc %edx
- add $32, %ecx
-L(shr_13_gobble_next):
- test %edx, %edx
- jnz L(exit)
-
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
-
- lea (%ecx, %edi,1), %eax
- lea 13(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_14):
- cmp $80, %ecx
- lea -48(%ecx), %ecx
- mov %edx, %eax
- jae L(shr_14_gobble)
-
- movdqa 16(%esi), %xmm1
- movdqa %xmm1, %xmm2
- palignr $14, (%esi), %xmm1
- pcmpeqb (%edi), %xmm1
-
- movdqa 32(%esi), %xmm3
- palignr $14, %xmm2, %xmm3
- pcmpeqb 16(%edi), %xmm3
-
- pand %xmm1, %xmm3
- pmovmskb %xmm3, %edx
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
- lea (%ecx, %edi,1), %eax
- lea 14(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_14_gobble):
- sub $32, %ecx
- movdqa 16(%esi), %xmm0
- palignr $14, (%esi), %xmm0
- pcmpeqb (%edi), %xmm0
-
- movdqa 32(%esi), %xmm3
- palignr $14, 16(%esi), %xmm3
- pcmpeqb 16(%edi), %xmm3
-
-L(shr_14_gobble_loop):
- pand %xmm0, %xmm3
- sub $32, %ecx
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
-
- movdqa 64(%esi), %xmm3
- palignr $14,48(%esi), %xmm3
- sbb $0xffff, %edx
- movdqa 48(%esi), %xmm0
- palignr $14,32(%esi), %xmm0
- pcmpeqb 32(%edi), %xmm0
- lea 32(%esi), %esi
- pcmpeqb 48(%edi), %xmm3
-
- lea 32(%edi), %edi
- jz L(shr_14_gobble_loop)
- pand %xmm0, %xmm3
-
- cmp $0, %ecx
- jge L(shr_14_gobble_next)
- inc %edx
- add $32, %ecx
-L(shr_14_gobble_next):
- test %edx, %edx
- jnz L(exit)
-
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
-
- lea (%ecx, %edi,1), %eax
- lea 14(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_15):
- cmp $80, %ecx
- lea -48(%ecx), %ecx
- mov %edx, %eax
- jae L(shr_15_gobble)
-
- movdqa 16(%esi), %xmm1
- movdqa %xmm1, %xmm2
- palignr $15, (%esi), %xmm1
- pcmpeqb (%edi), %xmm1
-
- movdqa 32(%esi), %xmm3
- palignr $15, %xmm2, %xmm3
- pcmpeqb 16(%edi), %xmm3
-
- pand %xmm1, %xmm3
- pmovmskb %xmm3, %edx
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
- lea (%ecx, %edi,1), %eax
- lea 15(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(shr_15_gobble):
- sub $32, %ecx
- movdqa 16(%esi), %xmm0
- palignr $15, (%esi), %xmm0
- pcmpeqb (%edi), %xmm0
-
- movdqa 32(%esi), %xmm3
- palignr $15, 16(%esi), %xmm3
- pcmpeqb 16(%edi), %xmm3
-
-L(shr_15_gobble_loop):
- pand %xmm0, %xmm3
- sub $32, %ecx
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
-
- movdqa 64(%esi), %xmm3
- palignr $15,48(%esi), %xmm3
- sbb $0xffff, %edx
- movdqa 48(%esi), %xmm0
- palignr $15,32(%esi), %xmm0
- pcmpeqb 32(%edi), %xmm0
- lea 32(%esi), %esi
- pcmpeqb 48(%edi), %xmm3
-
- lea 32(%edi), %edi
- jz L(shr_15_gobble_loop)
- pand %xmm0, %xmm3
-
- cmp $0, %ecx
- jge L(shr_15_gobble_next)
- inc %edx
- add $32, %ecx
-L(shr_15_gobble_next):
- test %edx, %edx
- jnz L(exit)
-
- pmovmskb %xmm3, %edx
- movdqa %xmm0, %xmm1
- lea 32(%edi), %edi
- lea 32(%esi), %esi
- sub $0xffff, %edx
- jnz L(exit)
-
- lea (%ecx, %edi,1), %eax
- lea 15(%ecx, %esi,1), %edx
- POP (%edi)
- POP (%esi)
- jmp L(less48bytes)
-# endif
-
- cfi_restore_state
- cfi_remember_state
- .p2align 4
-L(exit):
- pmovmskb %xmm1, %ebx
- sub $0xffff, %ebx
- jz L(first16bytes)
- lea -16(%esi), %esi
- lea -16(%edi), %edi
- mov %ebx, %edx
-
-L(first16bytes):
- add %eax, %esi
-L(less16bytes):
-
-# ifndef USE_AS_WMEMCMP
- test %dl, %dl
- jz L(next_24_bytes)
-
- test $0x01, %dl
- jnz L(Byte16)
-
- test $0x02, %dl
- jnz L(Byte17)
-
- test $0x04, %dl
- jnz L(Byte18)
-
- test $0x08, %dl
- jnz L(Byte19)
-
- test $0x10, %dl
- jnz L(Byte20)
-
- test $0x20, %dl
- jnz L(Byte21)
-
- test $0x40, %dl
- jnz L(Byte22)
-L(Byte23):
- movzbl -9(%edi), %eax
- movzbl -9(%esi), %edx
- sub %edx, %eax
- RETURN
-
- .p2align 4
-L(Byte16):
- movzbl -16(%edi), %eax
- movzbl -16(%esi), %edx
- sub %edx, %eax
- RETURN
-
- .p2align 4
-L(Byte17):
- movzbl -15(%edi), %eax
- movzbl -15(%esi), %edx
- sub %edx, %eax
- RETURN
-
- .p2align 4
-L(Byte18):
- movzbl -14(%edi), %eax
- movzbl -14(%esi), %edx
- sub %edx, %eax
- RETURN
-
- .p2align 4
-L(Byte19):
- movzbl -13(%edi), %eax
- movzbl -13(%esi), %edx
- sub %edx, %eax
- RETURN
-
- .p2align 4
-L(Byte20):
- movzbl -12(%edi), %eax
- movzbl -12(%esi), %edx
- sub %edx, %eax
- RETURN
-
- .p2align 4
-L(Byte21):
- movzbl -11(%edi), %eax
- movzbl -11(%esi), %edx
- sub %edx, %eax
- RETURN
-
- .p2align 4
-L(Byte22):
- movzbl -10(%edi), %eax
- movzbl -10(%esi), %edx
- sub %edx, %eax
- RETURN
-
- .p2align 4
-L(next_24_bytes):
- lea 8(%edi), %edi
- lea 8(%esi), %esi
- test $0x01, %dh
- jnz L(Byte16)
-
- test $0x02, %dh
- jnz L(Byte17)
-
- test $0x04, %dh
- jnz L(Byte18)
-
- test $0x08, %dh
- jnz L(Byte19)
-
- test $0x10, %dh
- jnz L(Byte20)
-
- test $0x20, %dh
- jnz L(Byte21)
-
- test $0x40, %dh
- jnz L(Byte22)
-
- .p2align 4
-L(Byte31):
- movzbl -9(%edi), %eax
- movzbl -9(%esi), %edx
- sub %edx, %eax
- RETURN_END
-# else
-
-/* special for wmemcmp */
- xor %eax, %eax
- test %dl, %dl
- jz L(next_two_double_words)
- and $15, %dl
- jz L(second_double_word)
- mov -16(%edi), %eax
- cmp -16(%esi), %eax
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(second_double_word):
- mov -12(%edi), %eax
- cmp -12(%esi), %eax
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(next_two_double_words):
- and $15, %dh
- jz L(fourth_double_word)
- mov -8(%edi), %eax
- cmp -8(%esi), %eax
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(fourth_double_word):
- mov -4(%edi), %eax
- cmp -4(%esi), %eax
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(nequal):
- mov $1, %eax
- jg L(nequal_bigger)
- neg %eax
- RETURN
-
- .p2align 4
-L(nequal_bigger):
- RETURN_END
-# endif
-
- CFI_PUSH (%ebx)
-
- .p2align 4
-L(more8bytes):
- cmp $16, %ecx
- jae L(more16bytes)
- cmp $8, %ecx
- je L(8bytes)
-# ifndef USE_AS_WMEMCMP
- cmp $9, %ecx
- je L(9bytes)
- cmp $10, %ecx
- je L(10bytes)
- cmp $11, %ecx
- je L(11bytes)
- cmp $12, %ecx
- je L(12bytes)
- cmp $13, %ecx
- je L(13bytes)
- cmp $14, %ecx
- je L(14bytes)
- jmp L(15bytes)
-# else
- jmp L(12bytes)
-# endif
-
- .p2align 4
-L(more16bytes):
- cmp $24, %ecx
- jae L(more24bytes)
- cmp $16, %ecx
- je L(16bytes)
-# ifndef USE_AS_WMEMCMP
- cmp $17, %ecx
- je L(17bytes)
- cmp $18, %ecx
- je L(18bytes)
- cmp $19, %ecx
- je L(19bytes)
- cmp $20, %ecx
- je L(20bytes)
- cmp $21, %ecx
- je L(21bytes)
- cmp $22, %ecx
- je L(22bytes)
- jmp L(23bytes)
-# else
- jmp L(20bytes)
-# endif
-
- .p2align 4
-L(more24bytes):
- cmp $32, %ecx
- jae L(more32bytes)
- cmp $24, %ecx
- je L(24bytes)
-# ifndef USE_AS_WMEMCMP
- cmp $25, %ecx
- je L(25bytes)
- cmp $26, %ecx
- je L(26bytes)
- cmp $27, %ecx
- je L(27bytes)
- cmp $28, %ecx
- je L(28bytes)
- cmp $29, %ecx
- je L(29bytes)
- cmp $30, %ecx
- je L(30bytes)
- jmp L(31bytes)
-# else
- jmp L(28bytes)
-# endif
-
- .p2align 4
-L(more32bytes):
- cmp $40, %ecx
- jae L(more40bytes)
- cmp $32, %ecx
- je L(32bytes)
-# ifndef USE_AS_WMEMCMP
- cmp $33, %ecx
- je L(33bytes)
- cmp $34, %ecx
- je L(34bytes)
- cmp $35, %ecx
- je L(35bytes)
- cmp $36, %ecx
- je L(36bytes)
- cmp $37, %ecx
- je L(37bytes)
- cmp $38, %ecx
- je L(38bytes)
- jmp L(39bytes)
-# else
- jmp L(36bytes)
-# endif
-
- .p2align 4
-L(less48bytes):
- cmp $8, %ecx
- jae L(more8bytes)
-# ifndef USE_AS_WMEMCMP
- cmp $2, %ecx
- je L(2bytes)
- cmp $3, %ecx
- je L(3bytes)
- cmp $4, %ecx
- je L(4bytes)
- cmp $5, %ecx
- je L(5bytes)
- cmp $6, %ecx
- je L(6bytes)
- jmp L(7bytes)
-# else
- jmp L(4bytes)
-# endif
-
- .p2align 4
-L(more40bytes):
- cmp $40, %ecx
- je L(40bytes)
-# ifndef USE_AS_WMEMCMP
- cmp $41, %ecx
- je L(41bytes)
- cmp $42, %ecx
- je L(42bytes)
- cmp $43, %ecx
- je L(43bytes)
- cmp $44, %ecx
- je L(44bytes)
- cmp $45, %ecx
- je L(45bytes)
- cmp $46, %ecx
- je L(46bytes)
- jmp L(47bytes)
-
- .p2align 4
-L(44bytes):
- mov -44(%eax), %ecx
- mov -44(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(40bytes):
- mov -40(%eax), %ecx
- mov -40(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(36bytes):
- mov -36(%eax), %ecx
- mov -36(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(32bytes):
- mov -32(%eax), %ecx
- mov -32(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(28bytes):
- mov -28(%eax), %ecx
- mov -28(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(24bytes):
- mov -24(%eax), %ecx
- mov -24(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(20bytes):
- mov -20(%eax), %ecx
- mov -20(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(16bytes):
- mov -16(%eax), %ecx
- mov -16(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(12bytes):
- mov -12(%eax), %ecx
- mov -12(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(8bytes):
- mov -8(%eax), %ecx
- mov -8(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(4bytes):
- mov -4(%eax), %ecx
- mov -4(%edx), %ebx
- cmp %ebx, %ecx
- mov $0, %eax
- jne L(find_diff)
- POP (%ebx)
- ret
- CFI_PUSH (%ebx)
-# else
- .p2align 4
-L(44bytes):
- mov -44(%eax), %ecx
- cmp -44(%edx), %ecx
- jne L(find_diff)
-L(40bytes):
- mov -40(%eax), %ecx
- cmp -40(%edx), %ecx
- jne L(find_diff)
-L(36bytes):
- mov -36(%eax), %ecx
- cmp -36(%edx), %ecx
- jne L(find_diff)
-L(32bytes):
- mov -32(%eax), %ecx
- cmp -32(%edx), %ecx
- jne L(find_diff)
-L(28bytes):
- mov -28(%eax), %ecx
- cmp -28(%edx), %ecx
- jne L(find_diff)
-L(24bytes):
- mov -24(%eax), %ecx
- cmp -24(%edx), %ecx
- jne L(find_diff)
-L(20bytes):
- mov -20(%eax), %ecx
- cmp -20(%edx), %ecx
- jne L(find_diff)
-L(16bytes):
- mov -16(%eax), %ecx
- cmp -16(%edx), %ecx
- jne L(find_diff)
-L(12bytes):
- mov -12(%eax), %ecx
- cmp -12(%edx), %ecx
- jne L(find_diff)
-L(8bytes):
- mov -8(%eax), %ecx
- cmp -8(%edx), %ecx
- jne L(find_diff)
-L(4bytes):
- mov -4(%eax), %ecx
- xor %eax, %eax
- cmp -4(%edx), %ecx
- jne L(find_diff)
- POP (%ebx)
- ret
- CFI_PUSH (%ebx)
-# endif
-
-# ifndef USE_AS_WMEMCMP
-
- .p2align 4
-L(45bytes):
- mov -45(%eax), %ecx
- mov -45(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(41bytes):
- mov -41(%eax), %ecx
- mov -41(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(37bytes):
- mov -37(%eax), %ecx
- mov -37(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(33bytes):
- mov -33(%eax), %ecx
- mov -33(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(29bytes):
- mov -29(%eax), %ecx
- mov -29(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(25bytes):
- mov -25(%eax), %ecx
- mov -25(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(21bytes):
- mov -21(%eax), %ecx
- mov -21(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(17bytes):
- mov -17(%eax), %ecx
- mov -17(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(13bytes):
- mov -13(%eax), %ecx
- mov -13(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(9bytes):
- mov -9(%eax), %ecx
- mov -9(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(5bytes):
- mov -5(%eax), %ecx
- mov -5(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzbl -1(%eax), %ecx
- cmp -1(%edx), %cl
- mov $0, %eax
- jne L(end)
- POP (%ebx)
- ret
- CFI_PUSH (%ebx)
-
- .p2align 4
-L(46bytes):
- mov -46(%eax), %ecx
- mov -46(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(42bytes):
- mov -42(%eax), %ecx
- mov -42(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(38bytes):
- mov -38(%eax), %ecx
- mov -38(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(34bytes):
- mov -34(%eax), %ecx
- mov -34(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(30bytes):
- mov -30(%eax), %ecx
- mov -30(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(26bytes):
- mov -26(%eax), %ecx
- mov -26(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(22bytes):
- mov -22(%eax), %ecx
- mov -22(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(18bytes):
- mov -18(%eax), %ecx
- mov -18(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(14bytes):
- mov -14(%eax), %ecx
- mov -14(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(10bytes):
- mov -10(%eax), %ecx
- mov -10(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(6bytes):
- mov -6(%eax), %ecx
- mov -6(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(2bytes):
- movzwl -2(%eax), %ecx
- movzwl -2(%edx), %ebx
- cmp %bl, %cl
- jne L(end)
- cmp %bh, %ch
- mov $0, %eax
- jne L(end)
- POP (%ebx)
- ret
- CFI_PUSH (%ebx)
-
- .p2align 4
-L(47bytes):
- movl -47(%eax), %ecx
- movl -47(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(43bytes):
- movl -43(%eax), %ecx
- movl -43(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(39bytes):
- movl -39(%eax), %ecx
- movl -39(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(35bytes):
- movl -35(%eax), %ecx
- movl -35(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(31bytes):
- movl -31(%eax), %ecx
- movl -31(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(27bytes):
- movl -27(%eax), %ecx
- movl -27(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(23bytes):
- movl -23(%eax), %ecx
- movl -23(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(19bytes):
- movl -19(%eax), %ecx
- movl -19(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(15bytes):
- movl -15(%eax), %ecx
- movl -15(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(11bytes):
- movl -11(%eax), %ecx
- movl -11(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(7bytes):
- movl -7(%eax), %ecx
- movl -7(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(3bytes):
- movzwl -3(%eax), %ecx
- movzwl -3(%edx), %ebx
- cmpb %bl, %cl
- jne L(end)
- cmp %bx, %cx
- jne L(end)
- movzbl -1(%eax), %eax
- cmpb -1(%edx), %al
- mov $0, %eax
- jne L(end)
- POP (%ebx)
- ret
- CFI_PUSH (%ebx)
-
- .p2align 4
-L(find_diff):
- cmpb %bl, %cl
- jne L(end)
- cmp %bx, %cx
- jne L(end)
- shr $16,%ecx
- shr $16,%ebx
- cmp %bl, %cl
- jne L(end)
- cmp %bx, %cx
-
- .p2align 4
-L(end):
- POP (%ebx)
- mov $1, %eax
- ja L(bigger)
- neg %eax
-L(bigger):
- ret
-# else
-
-/* for wmemcmp */
- .p2align 4
-L(find_diff):
- POP (%ebx)
- mov $1, %eax
- jg L(find_diff_bigger)
- neg %eax
- ret
-
- .p2align 4
-L(find_diff_bigger):
- ret
-
-# endif
-END (MEMCMP)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memcmp.S b/sysdeps/i386/i686/multiarch/memcmp.S
deleted file mode 100644
index 1fc5994a17..0000000000
--- a/sysdeps/i386/i686/multiarch/memcmp.S
+++ /dev/null
@@ -1,62 +0,0 @@
-/* Multiple versions of memcmp
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in libc. */
-#if IS_IN (libc)
- .text
-ENTRY(memcmp)
- .type memcmp, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__memcmp_ia32)
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- LOAD_FUNC_GOT_EAX (__memcmp_ssse3)
- HAS_CPU_FEATURE (SSE4_2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__memcmp_sse4_2)
-2: ret
-END(memcmp)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __memcmp_ia32, @function; \
- .p2align 4; \
- .globl __memcmp_ia32; \
- .hidden __memcmp_ia32; \
- __memcmp_ia32: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __memcmp_ia32, .-__memcmp_ia32
-
-# ifdef SHARED
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
- they will be called without setting up EBX needed for PLT which is
- used by IFUNC. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_memcmp; __GI_memcmp = __memcmp_ia32
-# endif
-#endif
-
-#include "../memcmp.S"
diff --git a/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S
deleted file mode 100644
index 2fe2072cb1..0000000000
--- a/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S
+++ /dev/null
@@ -1,681 +0,0 @@
-/* memcpy optimized with SSE2 unaligned memory access instructions.
- Copyright (C) 2014-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc) \
- && (defined SHARED \
- || defined USE_AS_MEMMOVE \
- || !defined USE_MULTIARCH)
-
-# include <sysdep.h>
-# include "asm-syntax.h"
-
-# ifndef MEMCPY
-# define MEMCPY __memcpy_sse2_unaligned
-# define MEMCPY_CHK __memcpy_chk_sse2_unaligned
-# endif
-
-# ifdef USE_AS_BCOPY
-# define SRC PARMS
-# define DEST SRC+4
-# define LEN DEST+4
-# else
-# define DEST PARMS
-# define SRC DEST+4
-# define LEN SRC+4
-# endif
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS 8 /* Preserve EBX. */
-# define ENTRANCE PUSH (%ebx);
-# define RETURN_END POP (%ebx); ret
-# define RETURN RETURN_END; CFI_PUSH (%ebx)
-
- .section .text.sse2,"ax",@progbits
-# if !defined USE_AS_BCOPY
-ENTRY (MEMCPY_CHK)
- movl 12(%esp), %eax
- cmpl %eax, 16(%esp)
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMCPY_CHK)
-# endif
-
-ENTRY (MEMCPY)
- ENTRANCE
- movl LEN(%esp), %ecx
- movl SRC(%esp), %eax
- movl DEST(%esp), %edx
- cmp %edx, %eax
-
-# ifdef USE_AS_MEMMOVE
- jg L(check_forward)
-
-L(mm_len_0_or_more_backward):
-/* Now do checks for lengths. We do [0..16], [16..32], [32..64], [64..128]
- separately. */
- cmp $16, %ecx
- jbe L(mm_len_0_16_bytes_backward)
-
- cmpl $32, %ecx
- jg L(mm_len_32_or_more_backward)
-
-/* Copy [0..32] and return. */
- movdqu (%eax), %xmm0
- movdqu -16(%eax, %ecx), %xmm1
- movdqu %xmm0, (%edx)
- movdqu %xmm1, -16(%edx, %ecx)
- jmp L(return)
-
-L(mm_len_32_or_more_backward):
- cmpl $64, %ecx
- jg L(mm_len_64_or_more_backward)
-
-/* Copy [0..64] and return. */
- movdqu (%eax), %xmm0
- movdqu 16(%eax), %xmm1
- movdqu -16(%eax, %ecx), %xmm2
- movdqu -32(%eax, %ecx), %xmm3
- movdqu %xmm0, (%edx)
- movdqu %xmm1, 16(%edx)
- movdqu %xmm2, -16(%edx, %ecx)
- movdqu %xmm3, -32(%edx, %ecx)
- jmp L(return)
-
-L(mm_len_64_or_more_backward):
- cmpl $128, %ecx
- jg L(mm_len_128_or_more_backward)
-
-/* Copy [0..128] and return. */
- movdqu (%eax), %xmm0
- movdqu 16(%eax), %xmm1
- movdqu 32(%eax), %xmm2
- movdqu 48(%eax), %xmm3
- movdqu -64(%eax, %ecx), %xmm4
- movdqu -48(%eax, %ecx), %xmm5
- movdqu -32(%eax, %ecx), %xmm6
- movdqu -16(%eax, %ecx), %xmm7
- movdqu %xmm0, (%edx)
- movdqu %xmm1, 16(%edx)
- movdqu %xmm2, 32(%edx)
- movdqu %xmm3, 48(%edx)
- movdqu %xmm4, -64(%edx, %ecx)
- movdqu %xmm5, -48(%edx, %ecx)
- movdqu %xmm6, -32(%edx, %ecx)
- movdqu %xmm7, -16(%edx, %ecx)
- jmp L(return)
-
-L(mm_len_128_or_more_backward):
- add %ecx, %eax
- cmp %edx, %eax
- movl SRC(%esp), %eax
- jle L(forward)
- PUSH (%esi)
- PUSH (%edi)
- PUSH (%ebx)
-
-/* Aligning the address of destination. */
- movdqu (%eax), %xmm4
- movdqu 16(%eax), %xmm5
- movdqu 32(%eax), %xmm6
- movdqu 48(%eax), %xmm7
- leal (%edx, %ecx), %esi
- movdqu -16(%eax, %ecx), %xmm0
- subl $16, %esp
- movdqu %xmm0, (%esp)
- mov %ecx, %edi
- movl %esi, %ecx
- andl $-16, %ecx
- leal (%ecx), %ebx
- subl %edx, %ebx
- leal (%eax, %ebx), %eax
- shrl $6, %ebx
-
-# ifdef SHARED_CACHE_SIZE_HALF
- cmp $SHARED_CACHE_SIZE_HALF, %edi
-# else
-# ifdef SHARED
- PUSH (%ebx)
- SETUP_PIC_REG (bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %edi
- POP (%ebx)
-# else
- cmp __x86_shared_cache_size_half, %edi
-# endif
-# endif
- jae L(mm_large_page_loop_backward)
-
- .p2align 4
-L(mm_main_loop_backward):
-
- prefetcht0 -128(%eax)
-
- movdqu -64(%eax), %xmm0
- movdqu -48(%eax), %xmm1
- movdqu -32(%eax), %xmm2
- movdqu -16(%eax), %xmm3
- movaps %xmm0, -64(%ecx)
- subl $64, %eax
- movaps %xmm1, -48(%ecx)
- movaps %xmm2, -32(%ecx)
- movaps %xmm3, -16(%ecx)
- subl $64, %ecx
- sub $1, %ebx
- jnz L(mm_main_loop_backward)
- movdqu (%esp), %xmm0
- addl $16, %esp
- movdqu %xmm0, -16(%esi)
- movdqu %xmm4, (%edx)
- movdqu %xmm5, 16(%edx)
- movdqu %xmm6, 32(%edx)
- movdqu %xmm7, 48(%edx)
- POP (%ebx)
- jmp L(mm_return_pop_all)
-
-/* Copy [0..16] and return. */
-L(mm_len_0_16_bytes_backward):
- testb $24, %cl
- jnz L(mm_len_9_16_bytes_backward)
- testb $4, %cl
- .p2align 4,,5
- jnz L(mm_len_5_8_bytes_backward)
- testl %ecx, %ecx
- .p2align 4,,2
- je L(return)
- testb $2, %cl
- .p2align 4,,1
- jne L(mm_len_3_4_bytes_backward)
- movzbl -1(%eax,%ecx), %ebx
- movzbl (%eax), %eax
- movb %bl, -1(%edx,%ecx)
- movb %al, (%edx)
- jmp L(return)
-
-L(mm_len_3_4_bytes_backward):
- movzwl -2(%eax,%ecx), %ebx
- movzwl (%eax), %eax
- movw %bx, -2(%edx,%ecx)
- movw %ax, (%edx)
- jmp L(return)
-
-L(mm_len_9_16_bytes_backward):
- PUSH (%esi)
- movl -4(%eax,%ecx), %ebx
- movl -8(%eax,%ecx), %esi
- movl %ebx, -4(%edx,%ecx)
- movl %esi, -8(%edx,%ecx)
- subl $8, %ecx
- POP (%esi)
- jmp L(mm_len_0_16_bytes_backward)
-
-L(mm_len_5_8_bytes_backward):
- movl (%eax), %ebx
- movl -4(%eax,%ecx), %eax
- movl %ebx, (%edx)
- movl %eax, -4(%edx,%ecx)
- jmp L(return)
-
-/* Big length copy backward part. */
- .p2align 4
-L(mm_large_page_loop_backward):
- movdqu -64(%eax), %xmm0
- movdqu -48(%eax), %xmm1
- movdqu -32(%eax), %xmm2
- movdqu -16(%eax), %xmm3
- movntdq %xmm0, -64(%ecx)
- subl $64, %eax
- movntdq %xmm1, -48(%ecx)
- movntdq %xmm2, -32(%ecx)
- movntdq %xmm3, -16(%ecx)
- subl $64, %ecx
- sub $1, %ebx
- jnz L(mm_large_page_loop_backward)
- sfence
- movdqu (%esp), %xmm0
- addl $16, %esp
- movdqu %xmm0, -16(%esi)
- movdqu %xmm4, (%edx)
- movdqu %xmm5, 16(%edx)
- movdqu %xmm6, 32(%edx)
- movdqu %xmm7, 48(%edx)
- POP (%ebx)
- jmp L(mm_return_pop_all)
-
-L(check_forward):
- add %edx, %ecx
- cmp %eax, %ecx
- movl LEN(%esp), %ecx
- jle L(forward)
-
-/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128]
- separately. */
- cmp $16, %ecx
- jbe L(mm_len_0_16_bytes_forward)
-
- cmpl $32, %ecx
- ja L(mm_len_32_or_more_forward)
-
-/* Copy [0..32] and return. */
- movdqu (%eax), %xmm0
- movdqu -16(%eax, %ecx), %xmm1
- movdqu %xmm0, (%edx)
- movdqu %xmm1, -16(%edx, %ecx)
- jmp L(return)
-
-L(mm_len_32_or_more_forward):
- cmpl $64, %ecx
- ja L(mm_len_64_or_more_forward)
-
-/* Copy [0..64] and return. */
- movdqu (%eax), %xmm0
- movdqu 16(%eax), %xmm1
- movdqu -16(%eax, %ecx), %xmm2
- movdqu -32(%eax, %ecx), %xmm3
- movdqu %xmm0, (%edx)
- movdqu %xmm1, 16(%edx)
- movdqu %xmm2, -16(%edx, %ecx)
- movdqu %xmm3, -32(%edx, %ecx)
- jmp L(return)
-
-L(mm_len_64_or_more_forward):
- cmpl $128, %ecx
- ja L(mm_len_128_or_more_forward)
-
-/* Copy [0..128] and return. */
- movdqu (%eax), %xmm0
- movdqu 16(%eax), %xmm1
- movdqu 32(%eax), %xmm2
- movdqu 48(%eax), %xmm3
- movdqu -64(%eax, %ecx), %xmm4
- movdqu -48(%eax, %ecx), %xmm5
- movdqu -32(%eax, %ecx), %xmm6
- movdqu -16(%eax, %ecx), %xmm7
- movdqu %xmm0, (%edx)
- movdqu %xmm1, 16(%edx)
- movdqu %xmm2, 32(%edx)
- movdqu %xmm3, 48(%edx)
- movdqu %xmm4, -64(%edx, %ecx)
- movdqu %xmm5, -48(%edx, %ecx)
- movdqu %xmm6, -32(%edx, %ecx)
- movdqu %xmm7, -16(%edx, %ecx)
- jmp L(return)
-
-L(mm_len_128_or_more_forward):
- PUSH (%esi)
- PUSH (%edi)
- PUSH (%ebx)
-
-/* Aligning the address of destination. */
- movdqu -16(%eax, %ecx), %xmm4
- movdqu -32(%eax, %ecx), %xmm5
- movdqu -48(%eax, %ecx), %xmm6
- movdqu -64(%eax, %ecx), %xmm7
- leal (%edx, %ecx), %esi
- movdqu (%eax), %xmm0
- subl $16, %esp
- movdqu %xmm0, (%esp)
- mov %ecx, %edi
- leal 16(%edx), %ecx
- andl $-16, %ecx
- movl %ecx, %ebx
- subl %edx, %ebx
- addl %ebx, %eax
- movl %esi, %ebx
- subl %ecx, %ebx
- shrl $6, %ebx
-
-# ifdef SHARED_CACHE_SIZE_HALF
- cmp $SHARED_CACHE_SIZE_HALF, %edi
-# else
-# ifdef SHARED
- PUSH (%ebx)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %edi
- POP (%ebx)
-# else
- cmp __x86_shared_cache_size_half, %edi
-# endif
-# endif
- jae L(mm_large_page_loop_forward)
-
- .p2align 4
-L(mm_main_loop_forward):
-
- prefetcht0 128(%eax)
-
- movdqu (%eax), %xmm0
- movdqu 16(%eax), %xmm1
- movdqu 32(%eax), %xmm2
- movdqu 48(%eax), %xmm3
- movdqa %xmm0, (%ecx)
- addl $64, %eax
- movaps %xmm1, 16(%ecx)
- movaps %xmm2, 32(%ecx)
- movaps %xmm3, 48(%ecx)
- addl $64, %ecx
- sub $1, %ebx
- jnz L(mm_main_loop_forward)
- movdqu (%esp), %xmm0
- addl $16, %esp
- movdqu %xmm0, (%edx)
- movdqu %xmm4, -16(%esi)
- movdqu %xmm5, -32(%esi)
- movdqu %xmm6, -48(%esi)
- movdqu %xmm7, -64(%esi)
- POP (%ebx)
- jmp L(mm_return_pop_all)
-
-L(mm_len_0_16_bytes_forward):
- testb $24, %cl
- jne L(mm_len_9_16_bytes_forward)
- testb $4, %cl
- .p2align 4,,5
- jne L(mm_len_5_8_bytes_forward)
- testl %ecx, %ecx
- .p2align 4,,2
- je L(return)
- testb $2, %cl
- .p2align 4,,1
- jne L(mm_len_2_4_bytes_forward)
- movzbl -1(%eax,%ecx), %ebx
- movzbl (%eax), %eax
- movb %bl, -1(%edx,%ecx)
- movb %al, (%edx)
- jmp L(return)
-
-L(mm_len_2_4_bytes_forward):
- movzwl -2(%eax,%ecx), %ebx
- movzwl (%eax), %eax
- movw %bx, -2(%edx,%ecx)
- movw %ax, (%edx)
- jmp L(return)
-
-L(mm_len_5_8_bytes_forward):
- movl (%eax), %ebx
- movl -4(%eax,%ecx), %eax
- movl %ebx, (%edx)
- movl %eax, -4(%edx,%ecx)
- jmp L(return)
-
-L(mm_len_9_16_bytes_forward):
- movq (%eax), %xmm0
- movq -8(%eax, %ecx), %xmm1
- movq %xmm0, (%edx)
- movq %xmm1, -8(%edx, %ecx)
- jmp L(return)
-
-L(mm_return_pop_all):
- movl %edx, %eax
- POP (%edi)
- POP (%esi)
- RETURN
-
-/* Big length copy forward part. */
- .p2align 4
-L(mm_large_page_loop_forward):
- movdqu (%eax), %xmm0
- movdqu 16(%eax), %xmm1
- movdqu 32(%eax), %xmm2
- movdqu 48(%eax), %xmm3
- movntdq %xmm0, (%ecx)
- addl $64, %eax
- movntdq %xmm1, 16(%ecx)
- movntdq %xmm2, 32(%ecx)
- movntdq %xmm3, 48(%ecx)
- addl $64, %ecx
- sub $1, %ebx
- jnz L(mm_large_page_loop_forward)
- sfence
- movdqu (%esp), %xmm0
- addl $16, %esp
- movdqu %xmm0, (%edx)
- movdqu %xmm4, -16(%esi)
- movdqu %xmm5, -32(%esi)
- movdqu %xmm6, -48(%esi)
- movdqu %xmm7, -64(%esi)
- POP (%ebx)
- jmp L(mm_return_pop_all)
-# endif
-
-L(forward):
- cmp $16, %ecx
- jbe L(len_0_16_bytes)
-
-# ifdef SHARED_CACHE_SIZE_HALF
- cmp $SHARED_CACHE_SIZE_HALF, %ecx
-# else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_shared_cache_size_half, %ecx
-# endif
-# endif
- jae L(large_page)
-
- movdqu (%eax), %xmm0
- movdqu -16(%eax, %ecx), %xmm1
- cmpl $32, %ecx
- movdqu %xmm0, (%edx)
- movdqu %xmm1, -16(%edx, %ecx)
- jbe L(return)
-
- movdqu 16(%eax), %xmm0
- movdqu -32(%eax, %ecx), %xmm1
- cmpl $64, %ecx
- movdqu %xmm0, 16(%edx)
- movdqu %xmm1, -32(%edx, %ecx)
- jbe L(return)
-
- movdqu 32(%eax), %xmm0
- movdqu 48(%eax), %xmm1
- movdqu -48(%eax, %ecx), %xmm2
- movdqu -64(%eax, %ecx), %xmm3
- cmpl $128, %ecx
- movdqu %xmm0, 32(%edx)
- movdqu %xmm1, 48(%edx)
- movdqu %xmm2, -48(%edx, %ecx)
- movdqu %xmm3, -64(%edx, %ecx)
- jbe L(return)
-
-/* Now the main loop: we align the address of the destination. */
- leal 64(%edx), %ebx
- andl $-64, %ebx
-
- addl %edx, %ecx
- andl $-64, %ecx
-
- subl %edx, %eax
-
-/* We should stop two iterations before the termination
- (in order not to misprefetch). */
- subl $64, %ecx
- cmpl %ebx, %ecx
- je L(main_loop_just_one_iteration)
-
- subl $64, %ecx
- cmpl %ebx, %ecx
- je L(main_loop_last_two_iterations)
-
- .p2align 4
-L(main_loop_cache):
-
- prefetcht0 128(%ebx, %eax)
-
- movdqu (%ebx, %eax), %xmm0
- movdqu 16(%ebx, %eax), %xmm1
- movdqu 32(%ebx, %eax), %xmm2
- movdqu 48(%ebx, %eax), %xmm3
- movdqa %xmm0, (%ebx)
- movaps %xmm1, 16(%ebx)
- movaps %xmm2, 32(%ebx)
- movaps %xmm3, 48(%ebx)
- lea 64(%ebx), %ebx
- cmpl %ebx, %ecx
- jne L(main_loop_cache)
-
-L(main_loop_last_two_iterations):
- movdqu (%ebx, %eax), %xmm0
- movdqu 16(%ebx, %eax), %xmm1
- movdqu 32(%ebx, %eax), %xmm2
- movdqu 48(%ebx, %eax), %xmm3
- movdqu 64(%ebx, %eax), %xmm4
- movdqu 80(%ebx, %eax), %xmm5
- movdqu 96(%ebx, %eax), %xmm6
- movdqu 112(%ebx, %eax), %xmm7
- movdqa %xmm0, (%ebx)
- movaps %xmm1, 16(%ebx)
- movaps %xmm2, 32(%ebx)
- movaps %xmm3, 48(%ebx)
- movaps %xmm4, 64(%ebx)
- movaps %xmm5, 80(%ebx)
- movaps %xmm6, 96(%ebx)
- movaps %xmm7, 112(%ebx)
- jmp L(return)
-
-L(main_loop_just_one_iteration):
- movdqu (%ebx, %eax), %xmm0
- movdqu 16(%ebx, %eax), %xmm1
- movdqu 32(%ebx, %eax), %xmm2
- movdqu 48(%ebx, %eax), %xmm3
- movdqa %xmm0, (%ebx)
- movaps %xmm1, 16(%ebx)
- movaps %xmm2, 32(%ebx)
- movaps %xmm3, 48(%ebx)
- jmp L(return)
-
-L(large_page):
- movdqu (%eax), %xmm0
- movdqu 16(%eax), %xmm1
- movdqu 32(%eax), %xmm2
- movdqu 48(%eax), %xmm3
- movdqu -64(%eax, %ecx), %xmm4
- movdqu -48(%eax, %ecx), %xmm5
- movdqu -32(%eax, %ecx), %xmm6
- movdqu -16(%eax, %ecx), %xmm7
- movdqu %xmm0, (%edx)
- movdqu %xmm1, 16(%edx)
- movdqu %xmm2, 32(%edx)
- movdqu %xmm3, 48(%edx)
- movdqu %xmm4, -64(%edx, %ecx)
- movdqu %xmm5, -48(%edx, %ecx)
- movdqu %xmm6, -32(%edx, %ecx)
- movdqu %xmm7, -16(%edx, %ecx)
-
- movdqu 64(%eax), %xmm0
- movdqu 80(%eax), %xmm1
- movdqu 96(%eax), %xmm2
- movdqu 112(%eax), %xmm3
- movdqu -128(%eax, %ecx), %xmm4
- movdqu -112(%eax, %ecx), %xmm5
- movdqu -96(%eax, %ecx), %xmm6
- movdqu -80(%eax, %ecx), %xmm7
- movdqu %xmm0, 64(%edx)
- movdqu %xmm1, 80(%edx)
- movdqu %xmm2, 96(%edx)
- movdqu %xmm3, 112(%edx)
- movdqu %xmm4, -128(%edx, %ecx)
- movdqu %xmm5, -112(%edx, %ecx)
- movdqu %xmm6, -96(%edx, %ecx)
- movdqu %xmm7, -80(%edx, %ecx)
-
-/* Now the main loop with non temporal stores. We align
- the address of the destination. */
- leal 128(%edx), %ebx
- andl $-128, %ebx
-
- addl %edx, %ecx
- andl $-128, %ecx
-
- subl %edx, %eax
-
- .p2align 4
-L(main_loop_large_page):
- movdqu (%ebx, %eax), %xmm0
- movdqu 16(%ebx, %eax), %xmm1
- movdqu 32(%ebx, %eax), %xmm2
- movdqu 48(%ebx, %eax), %xmm3
- movdqu 64(%ebx, %eax), %xmm4
- movdqu 80(%ebx, %eax), %xmm5
- movdqu 96(%ebx, %eax), %xmm6
- movdqu 112(%ebx, %eax), %xmm7
- movntdq %xmm0, (%ebx)
- movntdq %xmm1, 16(%ebx)
- movntdq %xmm2, 32(%ebx)
- movntdq %xmm3, 48(%ebx)
- movntdq %xmm4, 64(%ebx)
- movntdq %xmm5, 80(%ebx)
- movntdq %xmm6, 96(%ebx)
- movntdq %xmm7, 112(%ebx)
- lea 128(%ebx), %ebx
- cmpl %ebx, %ecx
- jne L(main_loop_large_page)
- sfence
- jmp L(return)
-
-L(len_0_16_bytes):
- testb $24, %cl
- jne L(len_9_16_bytes)
- testb $4, %cl
- .p2align 4,,5
- jne L(len_5_8_bytes)
- testl %ecx, %ecx
- .p2align 4,,2
- je L(return)
- movzbl (%eax), %ebx
- testb $2, %cl
- movb %bl, (%edx)
- je L(return)
- movzwl -2(%eax,%ecx), %ebx
- movw %bx, -2(%edx,%ecx)
- jmp L(return)
-
-L(len_9_16_bytes):
- movq (%eax), %xmm0
- movq -8(%eax, %ecx), %xmm1
- movq %xmm0, (%edx)
- movq %xmm1, -8(%edx, %ecx)
- jmp L(return)
-
-L(len_5_8_bytes):
- movl (%eax), %ebx
- movl %ebx, (%edx)
- movl -4(%eax,%ecx), %ebx
- movl %ebx, -4(%edx,%ecx)
-
-L(return):
- movl %edx, %eax
-# if !defined USE_AS_BCOPY && defined USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-# endif
- RETURN
-
-END (MEMCPY)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S b/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
deleted file mode 100644
index 687e083147..0000000000
--- a/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
+++ /dev/null
@@ -1,1809 +0,0 @@
-/* memcpy with SSSE3 and REP string.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
-#if IS_IN (libc) \
- && (defined SHARED \
- || defined USE_AS_MEMMOVE \
- || !defined USE_MULTIARCH)
-
-#include "asm-syntax.h"
-
-#ifndef MEMCPY
-# define MEMCPY __memcpy_ssse3_rep
-# define MEMCPY_CHK __memcpy_chk_ssse3_rep
-#endif
-
-#ifdef USE_AS_BCOPY
-# define SRC PARMS
-# define DEST SRC+4
-# define LEN DEST+4
-#else
-# define DEST PARMS
-# define SRC DEST+4
-# define LEN SRC+4
-#endif
-
-#define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-#define PUSH(REG) pushl REG; CFI_PUSH (REG)
-#define POP(REG) popl REG; CFI_POP (REG)
-
-#ifdef SHARED
-# define PARMS 8 /* Preserve EBX. */
-# define ENTRANCE PUSH (%ebx);
-# define RETURN_END POP (%ebx); ret
-# define RETURN RETURN_END; CFI_PUSH (%ebx)
-# define JMPTBL(I, B) I - B
-
-/* Load an entry in a jump table into EBX and branch to it. TABLE is a
- jump table with relative offsets. INDEX is a register contains the
- index into the jump table. SCALE is the scale of INDEX. */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- /* We first load PC into EBX. */ \
- SETUP_PIC_REG(bx); \
- /* Get the address of the jump table. */ \
- addl $(TABLE - .), %ebx; \
- /* Get the entry and convert the relative offset to the \
- absolute address. */ \
- addl (%ebx,INDEX,SCALE), %ebx; \
- /* We loaded the jump table. Go. */ \
- jmp *%ebx
-
-# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) \
- addl $(TABLE - .), %ebx
-
-# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \
- addl (%ebx,INDEX,SCALE), %ebx; \
- /* We loaded the jump table. Go. */ \
- jmp *%ebx
-#else
-# define PARMS 4
-# define ENTRANCE
-# define RETURN_END ret
-# define RETURN RETURN_END
-# define JMPTBL(I, B) I
-
-/* Branch to an entry in a jump table. TABLE is a jump table with
- absolute offsets. INDEX is a register contains the index into the
- jump table. SCALE is the scale of INDEX. */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- jmp *TABLE(,INDEX,SCALE)
-
-# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE)
-
-# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \
- jmp *TABLE(,INDEX,SCALE)
-#endif
-
- .section .text.ssse3,"ax",@progbits
-#if !defined USE_AS_BCOPY
-ENTRY (MEMCPY_CHK)
- movl 12(%esp), %eax
- cmpl %eax, 16(%esp)
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMCPY_CHK)
-#endif
-ENTRY (MEMCPY)
- ENTRANCE
- movl LEN(%esp), %ecx
- movl SRC(%esp), %eax
- movl DEST(%esp), %edx
-
-#ifdef USE_AS_MEMMOVE
- cmp %eax, %edx
- jb L(copy_forward)
- je L(fwd_write_0bytes)
- cmp $48, %ecx
- jb L(bk_write_less48bytes)
- add %ecx, %eax
- cmp %eax, %edx
- movl SRC(%esp), %eax
- jb L(copy_backward)
-
-L(copy_forward):
-#endif
- cmp $48, %ecx
- jae L(48bytesormore)
-
-L(fwd_write_less32bytes):
-#ifndef USE_AS_MEMMOVE
- cmp %dl, %al
- jb L(bk_write)
-#endif
- add %ecx, %edx
- add %ecx, %eax
- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-#ifndef USE_AS_MEMMOVE
-L(bk_write):
- BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
-#endif
-
- ALIGN (4)
-/* ECX > 32 and EDX is 4 byte aligned. */
-L(48bytesormore):
- movdqu (%eax), %xmm0
- PUSH (%edi)
- movl %edx, %edi
- and $-16, %edx
- PUSH (%esi)
- cfi_remember_state
- add $16, %edx
- movl %edi, %esi
- sub %edx, %edi
- add %edi, %ecx
- sub %edi, %eax
-
-#ifdef SHARED_CACHE_SIZE_HALF
- cmp $SHARED_CACHE_SIZE_HALF, %ecx
-#else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_shared_cache_size_half, %ecx
-# endif
-#endif
-
- mov %eax, %edi
- jae L(large_page)
- and $0xf, %edi
- jz L(shl_0)
-
- BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4)
-
- ALIGN (4)
-L(shl_0):
- movdqu %xmm0, (%esi)
- xor %edi, %edi
- cmp $127, %ecx
- ja L(shl_0_gobble)
- lea -32(%ecx), %ecx
-L(shl_0_loop):
- movdqa (%eax, %edi), %xmm0
- movdqa 16(%eax, %edi), %xmm1
- sub $32, %ecx
- movdqa %xmm0, (%edx, %edi)
- movdqa %xmm1, 16(%edx, %edi)
- lea 32(%edi), %edi
- jb L(shl_0_end)
-
- movdqa (%eax, %edi), %xmm0
- movdqa 16(%eax, %edi), %xmm1
- sub $32, %ecx
- movdqa %xmm0, (%edx, %edi)
- movdqa %xmm1, 16(%edx, %edi)
- lea 32(%edi), %edi
- jb L(shl_0_end)
-
- movdqa (%eax, %edi), %xmm0
- movdqa 16(%eax, %edi), %xmm1
- sub $32, %ecx
- movdqa %xmm0, (%edx, %edi)
- movdqa %xmm1, 16(%edx, %edi)
- lea 32(%edi), %edi
- jb L(shl_0_end)
-
- movdqa (%eax, %edi), %xmm0
- movdqa 16(%eax, %edi), %xmm1
- sub $32, %ecx
- movdqa %xmm0, (%edx, %edi)
- movdqa %xmm1, 16(%edx, %edi)
- lea 32(%edi), %edi
-L(shl_0_end):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- add %edi, %eax
- POP (%esi)
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
- cfi_restore_state
- cfi_remember_state
-L(shl_0_gobble):
-
-#ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-#else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- mov __x86_data_cache_size_half@GOTOFF(%ebx), %edi
-# else
- mov __x86_data_cache_size_half, %edi
-# endif
-#endif
- mov %edi, %esi
- shr $3, %esi
- sub %esi, %edi
- cmp %edi, %ecx
- jae L(shl_0_gobble_mem_start)
- sub $128, %ecx
- ALIGN (4)
-L(shl_0_gobble_cache_loop):
- movdqa (%eax), %xmm0
- movaps 0x10(%eax), %xmm1
- movaps 0x20(%eax), %xmm2
- movaps 0x30(%eax), %xmm3
- movaps 0x40(%eax), %xmm4
- movaps 0x50(%eax), %xmm5
- movaps 0x60(%eax), %xmm6
- movaps 0x70(%eax), %xmm7
- lea 0x80(%eax), %eax
- sub $128, %ecx
- movdqa %xmm0, (%edx)
- movaps %xmm1, 0x10(%edx)
- movaps %xmm2, 0x20(%edx)
- movaps %xmm3, 0x30(%edx)
- movaps %xmm4, 0x40(%edx)
- movaps %xmm5, 0x50(%edx)
- movaps %xmm6, 0x60(%edx)
- movaps %xmm7, 0x70(%edx)
- lea 0x80(%edx), %edx
-
- jae L(shl_0_gobble_cache_loop)
- add $0x80, %ecx
- cmp $0x40, %ecx
- jb L(shl_0_cache_less_64bytes)
-
- movdqa (%eax), %xmm0
- sub $0x40, %ecx
- movdqa 0x10(%eax), %xmm1
-
- movdqa %xmm0, (%edx)
- movdqa %xmm1, 0x10(%edx)
-
- movdqa 0x20(%eax), %xmm0
- movdqa 0x30(%eax), %xmm1
- add $0x40, %eax
-
- movdqa %xmm0, 0x20(%edx)
- movdqa %xmm1, 0x30(%edx)
- add $0x40, %edx
-L(shl_0_cache_less_64bytes):
- cmp $0x20, %ecx
- jb L(shl_0_cache_less_32bytes)
- movdqa (%eax), %xmm0
- sub $0x20, %ecx
- movdqa 0x10(%eax), %xmm1
- add $0x20, %eax
- movdqa %xmm0, (%edx)
- movdqa %xmm1, 0x10(%edx)
- add $0x20, %edx
-L(shl_0_cache_less_32bytes):
- cmp $0x10, %ecx
- jb L(shl_0_cache_less_16bytes)
- sub $0x10, %ecx
- movdqa (%eax), %xmm0
- add $0x10, %eax
- movdqa %xmm0, (%edx)
- add $0x10, %edx
-L(shl_0_cache_less_16bytes):
- add %ecx, %edx
- add %ecx, %eax
- POP (%esi)
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
- cfi_restore_state
- cfi_remember_state
- ALIGN (4)
-L(shl_0_gobble_mem_start):
- cmp %al, %dl
- je L(copy_page_by_rep)
- sub $128, %ecx
-L(shl_0_gobble_mem_loop):
- prefetchnta 0x1c0(%eax)
- prefetchnta 0x280(%eax)
- prefetchnta 0x1c0(%edx)
- prefetchnta 0x280(%edx)
-
- movdqa (%eax), %xmm0
- movaps 0x10(%eax), %xmm1
- movaps 0x20(%eax), %xmm2
- movaps 0x30(%eax), %xmm3
- movaps 0x40(%eax), %xmm4
- movaps 0x50(%eax), %xmm5
- movaps 0x60(%eax), %xmm6
- movaps 0x70(%eax), %xmm7
- lea 0x80(%eax), %eax
- sub $0x80, %ecx
- movdqa %xmm0, (%edx)
- movaps %xmm1, 0x10(%edx)
- movaps %xmm2, 0x20(%edx)
- movaps %xmm3, 0x30(%edx)
- movaps %xmm4, 0x40(%edx)
- movaps %xmm5, 0x50(%edx)
- movaps %xmm6, 0x60(%edx)
- movaps %xmm7, 0x70(%edx)
- lea 0x80(%edx), %edx
-
- jae L(shl_0_gobble_mem_loop)
- add $0x80, %ecx
- cmp $0x40, %ecx
- jb L(shl_0_mem_less_64bytes)
-
- movdqa (%eax), %xmm0
- sub $0x40, %ecx
- movdqa 0x10(%eax), %xmm1
-
- movdqa %xmm0, (%edx)
- movdqa %xmm1, 0x10(%edx)
-
- movdqa 0x20(%eax), %xmm0
- movdqa 0x30(%eax), %xmm1
- add $0x40, %eax
-
- movdqa %xmm0, 0x20(%edx)
- movdqa %xmm1, 0x30(%edx)
- add $0x40, %edx
-L(shl_0_mem_less_64bytes):
- cmp $0x20, %ecx
- jb L(shl_0_mem_less_32bytes)
- movdqa (%eax), %xmm0
- sub $0x20, %ecx
- movdqa 0x10(%eax), %xmm1
- add $0x20, %eax
- movdqa %xmm0, (%edx)
- movdqa %xmm1, 0x10(%edx)
- add $0x20, %edx
-L(shl_0_mem_less_32bytes):
- cmp $0x10, %ecx
- jb L(shl_0_mem_less_16bytes)
- sub $0x10, %ecx
- movdqa (%eax), %xmm0
- add $0x10, %eax
- movdqa %xmm0, (%edx)
- add $0x10, %edx
-L(shl_0_mem_less_16bytes):
- add %ecx, %edx
- add %ecx, %eax
- POP (%esi)
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
- cfi_restore_state
- cfi_remember_state
- ALIGN (4)
-L(shl_1):
- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- sub $1, %eax
- movaps (%eax), %xmm1
- xor %edi, %edi
- sub $32, %ecx
- movdqu %xmm0, (%esi)
- POP (%esi)
-L(shl_1_loop):
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $1, %xmm2, %xmm3
- palignr $1, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(shl_1_end)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $1, %xmm2, %xmm3
- palignr $1, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(shl_1_loop)
-
-L(shl_1_end):
- add $32, %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 1(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
- cfi_restore_state
- cfi_remember_state
- ALIGN (4)
-L(shl_2):
- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- sub $2, %eax
- movaps (%eax), %xmm1
- xor %edi, %edi
- sub $32, %ecx
- movdqu %xmm0, (%esi)
- POP (%esi)
-L(shl_2_loop):
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $2, %xmm2, %xmm3
- palignr $2, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(shl_2_end)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $2, %xmm2, %xmm3
- palignr $2, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(shl_2_loop)
-
-L(shl_2_end):
- add $32, %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 2(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
- cfi_restore_state
- cfi_remember_state
- ALIGN (4)
-L(shl_3):
- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- sub $3, %eax
- movaps (%eax), %xmm1
- xor %edi, %edi
- sub $32, %ecx
- movdqu %xmm0, (%esi)
- POP (%esi)
-L(shl_3_loop):
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $3, %xmm2, %xmm3
- palignr $3, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(shl_3_end)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $3, %xmm2, %xmm3
- palignr $3, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(shl_3_loop)
-
-L(shl_3_end):
- add $32, %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 3(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
- cfi_restore_state
- cfi_remember_state
- ALIGN (4)
-L(shl_4):
- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- sub $4, %eax
- movaps (%eax), %xmm1
- xor %edi, %edi
- sub $32, %ecx
- movdqu %xmm0, (%esi)
- POP (%esi)
-L(shl_4_loop):
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $4, %xmm2, %xmm3
- palignr $4, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(shl_4_end)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $4, %xmm2, %xmm3
- palignr $4, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(shl_4_loop)
-
-L(shl_4_end):
- add $32, %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 4(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
- cfi_restore_state
- cfi_remember_state
- ALIGN (4)
-L(shl_5):
- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- sub $5, %eax
- movaps (%eax), %xmm1
- xor %edi, %edi
- sub $32, %ecx
- movdqu %xmm0, (%esi)
- POP (%esi)
-L(shl_5_loop):
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $5, %xmm2, %xmm3
- palignr $5, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(shl_5_end)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $5, %xmm2, %xmm3
- palignr $5, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(shl_5_loop)
-
-L(shl_5_end):
- add $32, %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 5(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
- cfi_restore_state
- cfi_remember_state
- ALIGN (4)
-L(shl_6):
- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- sub $6, %eax
- movaps (%eax), %xmm1
- xor %edi, %edi
- sub $32, %ecx
- movdqu %xmm0, (%esi)
- POP (%esi)
-L(shl_6_loop):
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $6, %xmm2, %xmm3
- palignr $6, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(shl_6_end)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $6, %xmm2, %xmm3
- palignr $6, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(shl_6_loop)
-
-L(shl_6_end):
- add $32, %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 6(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
- cfi_restore_state
- cfi_remember_state
- ALIGN (4)
-L(shl_7):
- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- sub $7, %eax
- movaps (%eax), %xmm1
- xor %edi, %edi
- sub $32, %ecx
- movdqu %xmm0, (%esi)
- POP (%esi)
-L(shl_7_loop):
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $7, %xmm2, %xmm3
- palignr $7, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(shl_7_end)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $7, %xmm2, %xmm3
- palignr $7, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(shl_7_loop)
-
-L(shl_7_end):
- add $32, %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 7(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
- cfi_restore_state
- cfi_remember_state
- ALIGN (4)
-L(shl_8):
- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- sub $8, %eax
- movaps (%eax), %xmm1
- xor %edi, %edi
- sub $32, %ecx
- movdqu %xmm0, (%esi)
- POP (%esi)
-L(shl_8_loop):
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $8, %xmm2, %xmm3
- palignr $8, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(shl_8_end)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $8, %xmm2, %xmm3
- palignr $8, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(shl_8_loop)
-
-L(shl_8_end):
- add $32, %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 8(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
- cfi_restore_state
- cfi_remember_state
- ALIGN (4)
-L(shl_9):
- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- sub $9, %eax
- movaps (%eax), %xmm1
- xor %edi, %edi
- sub $32, %ecx
- movdqu %xmm0, (%esi)
- POP (%esi)
-L(shl_9_loop):
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $9, %xmm2, %xmm3
- palignr $9, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(shl_9_end)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $9, %xmm2, %xmm3
- palignr $9, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(shl_9_loop)
-
-L(shl_9_end):
- add $32, %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 9(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
- cfi_restore_state
- cfi_remember_state
- ALIGN (4)
-L(shl_10):
- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- sub $10, %eax
- movaps (%eax), %xmm1
- xor %edi, %edi
- sub $32, %ecx
- movdqu %xmm0, (%esi)
- POP (%esi)
-L(shl_10_loop):
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $10, %xmm2, %xmm3
- palignr $10, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(shl_10_end)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $10, %xmm2, %xmm3
- palignr $10, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(shl_10_loop)
-
-L(shl_10_end):
- add $32, %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 10(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
- cfi_restore_state
- cfi_remember_state
- ALIGN (4)
-L(shl_11):
- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- sub $11, %eax
- movaps (%eax), %xmm1
- xor %edi, %edi
- sub $32, %ecx
- movdqu %xmm0, (%esi)
- POP (%esi)
-L(shl_11_loop):
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $11, %xmm2, %xmm3
- palignr $11, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(shl_11_end)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $11, %xmm2, %xmm3
- palignr $11, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(shl_11_loop)
-
-L(shl_11_end):
- add $32, %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 11(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
- cfi_restore_state
- cfi_remember_state
- ALIGN (4)
-L(shl_12):
- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- sub $12, %eax
- movaps (%eax), %xmm1
- xor %edi, %edi
- sub $32, %ecx
- movdqu %xmm0, (%esi)
- POP (%esi)
-L(shl_12_loop):
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $12, %xmm2, %xmm3
- palignr $12, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(shl_12_end)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $12, %xmm2, %xmm3
- palignr $12, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(shl_12_loop)
-
-L(shl_12_end):
- add $32, %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 12(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
- cfi_restore_state
- cfi_remember_state
- ALIGN (4)
-L(shl_13):
- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- sub $13, %eax
- movaps (%eax), %xmm1
- xor %edi, %edi
- sub $32, %ecx
- movdqu %xmm0, (%esi)
- POP (%esi)
-L(shl_13_loop):
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $13, %xmm2, %xmm3
- palignr $13, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(shl_13_end)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $13, %xmm2, %xmm3
- palignr $13, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(shl_13_loop)
-
-L(shl_13_end):
- add $32, %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 13(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
- cfi_restore_state
- cfi_remember_state
- ALIGN (4)
-L(shl_14):
- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- sub $14, %eax
- movaps (%eax), %xmm1
- xor %edi, %edi
- sub $32, %ecx
- movdqu %xmm0, (%esi)
- POP (%esi)
-L(shl_14_loop):
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $14, %xmm2, %xmm3
- palignr $14, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(shl_14_end)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $14, %xmm2, %xmm3
- palignr $14, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(shl_14_loop)
-
-L(shl_14_end):
- add $32, %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 14(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
- cfi_restore_state
- cfi_remember_state
- ALIGN (4)
-L(shl_15):
- BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- sub $15, %eax
- movaps (%eax), %xmm1
- xor %edi, %edi
- sub $32, %ecx
- movdqu %xmm0, (%esi)
- POP (%esi)
-L(shl_15_loop):
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $15, %xmm2, %xmm3
- palignr $15, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(shl_15_end)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $15, %xmm2, %xmm3
- palignr $15, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(shl_15_loop)
-
-L(shl_15_end):
- add $32, %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 15(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
-
- ALIGN (4)
-L(fwd_write_44bytes):
- movl -44(%eax), %ecx
- movl %ecx, -44(%edx)
-L(fwd_write_40bytes):
- movl -40(%eax), %ecx
- movl %ecx, -40(%edx)
-L(fwd_write_36bytes):
- movl -36(%eax), %ecx
- movl %ecx, -36(%edx)
-L(fwd_write_32bytes):
- movl -32(%eax), %ecx
- movl %ecx, -32(%edx)
-L(fwd_write_28bytes):
- movl -28(%eax), %ecx
- movl %ecx, -28(%edx)
-L(fwd_write_24bytes):
- movl -24(%eax), %ecx
- movl %ecx, -24(%edx)
-L(fwd_write_20bytes):
- movl -20(%eax), %ecx
- movl %ecx, -20(%edx)
-L(fwd_write_16bytes):
- movl -16(%eax), %ecx
- movl %ecx, -16(%edx)
-L(fwd_write_12bytes):
- movl -12(%eax), %ecx
- movl %ecx, -12(%edx)
-L(fwd_write_8bytes):
- movl -8(%eax), %ecx
- movl %ecx, -8(%edx)
-L(fwd_write_4bytes):
- movl -4(%eax), %ecx
- movl %ecx, -4(%edx)
-L(fwd_write_0bytes):
-#ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-#endif
- RETURN
-
- ALIGN (4)
-L(fwd_write_5bytes):
- movl -5(%eax), %ecx
- movl -4(%eax), %eax
- movl %ecx, -5(%edx)
- movl %eax, -4(%edx)
-#ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-#endif
- RETURN
-
- ALIGN (4)
-L(fwd_write_45bytes):
- movl -45(%eax), %ecx
- movl %ecx, -45(%edx)
-L(fwd_write_41bytes):
- movl -41(%eax), %ecx
- movl %ecx, -41(%edx)
-L(fwd_write_37bytes):
- movl -37(%eax), %ecx
- movl %ecx, -37(%edx)
-L(fwd_write_33bytes):
- movl -33(%eax), %ecx
- movl %ecx, -33(%edx)
-L(fwd_write_29bytes):
- movl -29(%eax), %ecx
- movl %ecx, -29(%edx)
-L(fwd_write_25bytes):
- movl -25(%eax), %ecx
- movl %ecx, -25(%edx)
-L(fwd_write_21bytes):
- movl -21(%eax), %ecx
- movl %ecx, -21(%edx)
-L(fwd_write_17bytes):
- movl -17(%eax), %ecx
- movl %ecx, -17(%edx)
-L(fwd_write_13bytes):
- movl -13(%eax), %ecx
- movl %ecx, -13(%edx)
-L(fwd_write_9bytes):
- movl -9(%eax), %ecx
- movl %ecx, -9(%edx)
- movl -5(%eax), %ecx
- movl %ecx, -5(%edx)
-L(fwd_write_1bytes):
- movzbl -1(%eax), %ecx
- movb %cl, -1(%edx)
-#ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-#endif
- RETURN
-
- ALIGN (4)
-L(fwd_write_46bytes):
- movl -46(%eax), %ecx
- movl %ecx, -46(%edx)
-L(fwd_write_42bytes):
- movl -42(%eax), %ecx
- movl %ecx, -42(%edx)
-L(fwd_write_38bytes):
- movl -38(%eax), %ecx
- movl %ecx, -38(%edx)
-L(fwd_write_34bytes):
- movl -34(%eax), %ecx
- movl %ecx, -34(%edx)
-L(fwd_write_30bytes):
- movl -30(%eax), %ecx
- movl %ecx, -30(%edx)
-L(fwd_write_26bytes):
- movl -26(%eax), %ecx
- movl %ecx, -26(%edx)
-L(fwd_write_22bytes):
- movl -22(%eax), %ecx
- movl %ecx, -22(%edx)
-L(fwd_write_18bytes):
- movl -18(%eax), %ecx
- movl %ecx, -18(%edx)
-L(fwd_write_14bytes):
- movl -14(%eax), %ecx
- movl %ecx, -14(%edx)
-L(fwd_write_10bytes):
- movl -10(%eax), %ecx
- movl %ecx, -10(%edx)
-L(fwd_write_6bytes):
- movl -6(%eax), %ecx
- movl %ecx, -6(%edx)
-L(fwd_write_2bytes):
- movzwl -2(%eax), %ecx
- movw %cx, -2(%edx)
-#ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-#endif
- RETURN
-
- ALIGN (4)
-L(fwd_write_47bytes):
- movl -47(%eax), %ecx
- movl %ecx, -47(%edx)
-L(fwd_write_43bytes):
- movl -43(%eax), %ecx
- movl %ecx, -43(%edx)
-L(fwd_write_39bytes):
- movl -39(%eax), %ecx
- movl %ecx, -39(%edx)
-L(fwd_write_35bytes):
- movl -35(%eax), %ecx
- movl %ecx, -35(%edx)
-L(fwd_write_31bytes):
- movl -31(%eax), %ecx
- movl %ecx, -31(%edx)
-L(fwd_write_27bytes):
- movl -27(%eax), %ecx
- movl %ecx, -27(%edx)
-L(fwd_write_23bytes):
- movl -23(%eax), %ecx
- movl %ecx, -23(%edx)
-L(fwd_write_19bytes):
- movl -19(%eax), %ecx
- movl %ecx, -19(%edx)
-L(fwd_write_15bytes):
- movl -15(%eax), %ecx
- movl %ecx, -15(%edx)
-L(fwd_write_11bytes):
- movl -11(%eax), %ecx
- movl %ecx, -11(%edx)
-L(fwd_write_7bytes):
- movl -7(%eax), %ecx
- movl %ecx, -7(%edx)
-L(fwd_write_3bytes):
- movzwl -3(%eax), %ecx
- movzbl -1(%eax), %eax
- movw %cx, -3(%edx)
- movb %al, -1(%edx)
-#ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-#endif
- RETURN_END
-
- cfi_restore_state
- cfi_remember_state
- ALIGN (4)
-L(large_page):
- movdqu (%eax), %xmm1
- movdqu %xmm0, (%esi)
- movntdq %xmm1, (%edx)
- add $0x10, %eax
- add $0x10, %edx
- sub $0x10, %ecx
- cmp %al, %dl
- je L(copy_page_by_rep)
-L(large_page_loop_init):
- POP (%esi)
- sub $0x80, %ecx
- POP (%edi)
-L(large_page_loop):
- prefetchnta 0x1c0(%eax)
- prefetchnta 0x280(%eax)
- movdqu (%eax), %xmm0
- movdqu 0x10(%eax), %xmm1
- movdqu 0x20(%eax), %xmm2
- movdqu 0x30(%eax), %xmm3
- movdqu 0x40(%eax), %xmm4
- movdqu 0x50(%eax), %xmm5
- movdqu 0x60(%eax), %xmm6
- movdqu 0x70(%eax), %xmm7
- lea 0x80(%eax), %eax
- lfence
- sub $0x80, %ecx
- movntdq %xmm0, (%edx)
- movntdq %xmm1, 0x10(%edx)
- movntdq %xmm2, 0x20(%edx)
- movntdq %xmm3, 0x30(%edx)
- movntdq %xmm4, 0x40(%edx)
- movntdq %xmm5, 0x50(%edx)
- movntdq %xmm6, 0x60(%edx)
- movntdq %xmm7, 0x70(%edx)
- lea 0x80(%edx), %edx
- jae L(large_page_loop)
- add $0x80, %ecx
- cmp $0x40, %ecx
- jb L(large_page_less_64bytes)
-
- movdqu (%eax), %xmm0
- movdqu 0x10(%eax), %xmm1
- movdqu 0x20(%eax), %xmm2
- movdqu 0x30(%eax), %xmm3
- lea 0x40(%eax), %eax
-
- movntdq %xmm0, (%edx)
- movntdq %xmm1, 0x10(%edx)
- movntdq %xmm2, 0x20(%edx)
- movntdq %xmm3, 0x30(%edx)
- lea 0x40(%edx), %edx
- sub $0x40, %ecx
-L(large_page_less_64bytes):
- cmp $32, %ecx
- jb L(large_page_less_32bytes)
- movdqu (%eax), %xmm0
- movdqu 0x10(%eax), %xmm1
- lea 0x20(%eax), %eax
- movntdq %xmm0, (%edx)
- movntdq %xmm1, 0x10(%edx)
- lea 0x20(%edx), %edx
- sub $0x20, %ecx
-L(large_page_less_32bytes):
- add %ecx, %edx
- add %ecx, %eax
- sfence
- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
- cfi_restore_state
- cfi_remember_state
- ALIGN (4)
-L(copy_page_by_rep):
- mov %eax, %esi
- mov %edx, %edi
- mov %ecx, %edx
- shr $2, %ecx
- and $3, %edx
- rep movsl
- jz L(copy_page_by_rep_exit)
- cmp $2, %edx
- jb L(copy_page_by_rep_left_1)
- movzwl (%esi), %eax
- movw %ax, (%edi)
- add $2, %esi
- add $2, %edi
- sub $2, %edx
- jz L(copy_page_by_rep_exit)
-L(copy_page_by_rep_left_1):
- movzbl (%esi), %eax
- movb %al, (%edi)
-L(copy_page_by_rep_exit):
- POP (%esi)
- POP (%edi)
-#ifndef USE_AS_BCOPY
- movl DEST(%esp), %eax
-# ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-# endif
-#endif
- RETURN
-
- ALIGN (4)
-L(bk_write_44bytes):
- movl 40(%eax), %ecx
- movl %ecx, 40(%edx)
-L(bk_write_40bytes):
- movl 36(%eax), %ecx
- movl %ecx, 36(%edx)
-L(bk_write_36bytes):
- movl 32(%eax), %ecx
- movl %ecx, 32(%edx)
-L(bk_write_32bytes):
- movl 28(%eax), %ecx
- movl %ecx, 28(%edx)
-L(bk_write_28bytes):
- movl 24(%eax), %ecx
- movl %ecx, 24(%edx)
-L(bk_write_24bytes):
- movl 20(%eax), %ecx
- movl %ecx, 20(%edx)
-L(bk_write_20bytes):
- movl 16(%eax), %ecx
- movl %ecx, 16(%edx)
-L(bk_write_16bytes):
- movl 12(%eax), %ecx
- movl %ecx, 12(%edx)
-L(bk_write_12bytes):
- movl 8(%eax), %ecx
- movl %ecx, 8(%edx)
-L(bk_write_8bytes):
- movl 4(%eax), %ecx
- movl %ecx, 4(%edx)
-L(bk_write_4bytes):
- movl (%eax), %ecx
- movl %ecx, (%edx)
-L(bk_write_0bytes):
-#ifndef USE_AS_BCOPY
- movl DEST(%esp), %eax
-# ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-# endif
-#endif
- RETURN
-
- ALIGN (4)
-L(bk_write_45bytes):
- movl 41(%eax), %ecx
- movl %ecx, 41(%edx)
-L(bk_write_41bytes):
- movl 37(%eax), %ecx
- movl %ecx, 37(%edx)
-L(bk_write_37bytes):
- movl 33(%eax), %ecx
- movl %ecx, 33(%edx)
-L(bk_write_33bytes):
- movl 29(%eax), %ecx
- movl %ecx, 29(%edx)
-L(bk_write_29bytes):
- movl 25(%eax), %ecx
- movl %ecx, 25(%edx)
-L(bk_write_25bytes):
- movl 21(%eax), %ecx
- movl %ecx, 21(%edx)
-L(bk_write_21bytes):
- movl 17(%eax), %ecx
- movl %ecx, 17(%edx)
-L(bk_write_17bytes):
- movl 13(%eax), %ecx
- movl %ecx, 13(%edx)
-L(bk_write_13bytes):
- movl 9(%eax), %ecx
- movl %ecx, 9(%edx)
-L(bk_write_9bytes):
- movl 5(%eax), %ecx
- movl %ecx, 5(%edx)
-L(bk_write_5bytes):
- movl 1(%eax), %ecx
- movl %ecx, 1(%edx)
-L(bk_write_1bytes):
- movzbl (%eax), %ecx
- movb %cl, (%edx)
-#ifndef USE_AS_BCOPY
- movl DEST(%esp), %eax
-# ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-# endif
-#endif
- RETURN
-
- ALIGN (4)
-L(bk_write_46bytes):
- movl 42(%eax), %ecx
- movl %ecx, 42(%edx)
-L(bk_write_42bytes):
- movl 38(%eax), %ecx
- movl %ecx, 38(%edx)
-L(bk_write_38bytes):
- movl 34(%eax), %ecx
- movl %ecx, 34(%edx)
-L(bk_write_34bytes):
- movl 30(%eax), %ecx
- movl %ecx, 30(%edx)
-L(bk_write_30bytes):
- movl 26(%eax), %ecx
- movl %ecx, 26(%edx)
-L(bk_write_26bytes):
- movl 22(%eax), %ecx
- movl %ecx, 22(%edx)
-L(bk_write_22bytes):
- movl 18(%eax), %ecx
- movl %ecx, 18(%edx)
-L(bk_write_18bytes):
- movl 14(%eax), %ecx
- movl %ecx, 14(%edx)
-L(bk_write_14bytes):
- movl 10(%eax), %ecx
- movl %ecx, 10(%edx)
-L(bk_write_10bytes):
- movl 6(%eax), %ecx
- movl %ecx, 6(%edx)
-L(bk_write_6bytes):
- movl 2(%eax), %ecx
- movl %ecx, 2(%edx)
-L(bk_write_2bytes):
- movzwl (%eax), %ecx
- movw %cx, (%edx)
-#ifndef USE_AS_BCOPY
- movl DEST(%esp), %eax
-# ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-# endif
-#endif
- RETURN
-
- ALIGN (4)
-L(bk_write_47bytes):
- movl 43(%eax), %ecx
- movl %ecx, 43(%edx)
-L(bk_write_43bytes):
- movl 39(%eax), %ecx
- movl %ecx, 39(%edx)
-L(bk_write_39bytes):
- movl 35(%eax), %ecx
- movl %ecx, 35(%edx)
-L(bk_write_35bytes):
- movl 31(%eax), %ecx
- movl %ecx, 31(%edx)
-L(bk_write_31bytes):
- movl 27(%eax), %ecx
- movl %ecx, 27(%edx)
-L(bk_write_27bytes):
- movl 23(%eax), %ecx
- movl %ecx, 23(%edx)
-L(bk_write_23bytes):
- movl 19(%eax), %ecx
- movl %ecx, 19(%edx)
-L(bk_write_19bytes):
- movl 15(%eax), %ecx
- movl %ecx, 15(%edx)
-L(bk_write_15bytes):
- movl 11(%eax), %ecx
- movl %ecx, 11(%edx)
-L(bk_write_11bytes):
- movl 7(%eax), %ecx
- movl %ecx, 7(%edx)
-L(bk_write_7bytes):
- movl 3(%eax), %ecx
- movl %ecx, 3(%edx)
-L(bk_write_3bytes):
- movzwl 1(%eax), %ecx
- movw %cx, 1(%edx)
- movzbl (%eax), %eax
- movb %al, (%edx)
-#ifndef USE_AS_BCOPY
- movl DEST(%esp), %eax
-# ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-# endif
-#endif
- RETURN_END
-
-
- .pushsection .rodata.ssse3,"a",@progbits
- ALIGN (2)
-L(table_48bytes_fwd):
- .int JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd))
-
- ALIGN (2)
-L(shl_table):
- .int JMPTBL (L(shl_0), L(shl_table))
- .int JMPTBL (L(shl_1), L(shl_table))
- .int JMPTBL (L(shl_2), L(shl_table))
- .int JMPTBL (L(shl_3), L(shl_table))
- .int JMPTBL (L(shl_4), L(shl_table))
- .int JMPTBL (L(shl_5), L(shl_table))
- .int JMPTBL (L(shl_6), L(shl_table))
- .int JMPTBL (L(shl_7), L(shl_table))
- .int JMPTBL (L(shl_8), L(shl_table))
- .int JMPTBL (L(shl_9), L(shl_table))
- .int JMPTBL (L(shl_10), L(shl_table))
- .int JMPTBL (L(shl_11), L(shl_table))
- .int JMPTBL (L(shl_12), L(shl_table))
- .int JMPTBL (L(shl_13), L(shl_table))
- .int JMPTBL (L(shl_14), L(shl_table))
- .int JMPTBL (L(shl_15), L(shl_table))
-
- ALIGN (2)
-L(table_48_bytes_bwd):
- .int JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd))
-
- .popsection
-
-#ifdef USE_AS_MEMMOVE
- ALIGN (4)
-L(copy_backward):
- PUSH (%esi)
- movl %eax, %esi
- add %ecx, %edx
- add %ecx, %esi
- testl $0x3, %edx
- jnz L(bk_align)
-
-L(bk_aligned_4):
- cmp $64, %ecx
- jae L(bk_write_more64bytes)
-
-L(bk_write_64bytesless):
- cmp $32, %ecx
- jb L(bk_write_less32bytes)
-
-L(bk_write_more32bytes):
- /* Copy 32 bytes at a time. */
- sub $32, %ecx
- movl -4(%esi), %eax
- movl %eax, -4(%edx)
- movl -8(%esi), %eax
- movl %eax, -8(%edx)
- movl -12(%esi), %eax
- movl %eax, -12(%edx)
- movl -16(%esi), %eax
- movl %eax, -16(%edx)
- movl -20(%esi), %eax
- movl %eax, -20(%edx)
- movl -24(%esi), %eax
- movl %eax, -24(%edx)
- movl -28(%esi), %eax
- movl %eax, -28(%edx)
- movl -32(%esi), %eax
- movl %eax, -32(%edx)
- sub $32, %edx
- sub $32, %esi
-
-L(bk_write_less32bytes):
- movl %esi, %eax
- sub %ecx, %edx
- sub %ecx, %eax
- POP (%esi)
-L(bk_write_less48bytes):
- BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
-
- CFI_PUSH (%esi)
- ALIGN (4)
-L(bk_align):
- cmp $8, %ecx
- jbe L(bk_write_less32bytes)
- testl $1, %edx
- /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
- then (EDX & 2) must be != 0. */
- jz L(bk_got2)
- sub $1, %esi
- sub $1, %ecx
- sub $1, %edx
- movzbl (%esi), %eax
- movb %al, (%edx)
-
- testl $2, %edx
- jz L(bk_aligned_4)
-
-L(bk_got2):
- sub $2, %esi
- sub $2, %ecx
- sub $2, %edx
- movzwl (%esi), %eax
- movw %ax, (%edx)
- jmp L(bk_aligned_4)
-
- ALIGN (4)
-L(bk_write_more64bytes):
- /* Check alignment of last byte. */
- testl $15, %edx
- jz L(bk_ssse3_cpy_pre)
-
-/* EDX is aligned 4 bytes, but not 16 bytes. */
-L(bk_ssse3_align):
- sub $4, %esi
- sub $4, %ecx
- sub $4, %edx
- movl (%esi), %eax
- movl %eax, (%edx)
-
- testl $15, %edx
- jz L(bk_ssse3_cpy_pre)
-
- sub $4, %esi
- sub $4, %ecx
- sub $4, %edx
- movl (%esi), %eax
- movl %eax, (%edx)
-
- testl $15, %edx
- jz L(bk_ssse3_cpy_pre)
-
- sub $4, %esi
- sub $4, %ecx
- sub $4, %edx
- movl (%esi), %eax
- movl %eax, (%edx)
-
-L(bk_ssse3_cpy_pre):
- cmp $64, %ecx
- jb L(bk_write_more32bytes)
-
-L(bk_ssse3_cpy):
- sub $64, %esi
- sub $64, %ecx
- sub $64, %edx
- movdqu 0x30(%esi), %xmm3
- movdqa %xmm3, 0x30(%edx)
- movdqu 0x20(%esi), %xmm2
- movdqa %xmm2, 0x20(%edx)
- movdqu 0x10(%esi), %xmm1
- movdqa %xmm1, 0x10(%edx)
- movdqu (%esi), %xmm0
- movdqa %xmm0, (%edx)
- cmp $64, %ecx
- jae L(bk_ssse3_cpy)
- jmp L(bk_write_64bytesless)
-
-#endif
-
-END (MEMCPY)
-
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memcpy-ssse3.S b/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
deleted file mode 100644
index 53e8a6ca1d..0000000000
--- a/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
+++ /dev/null
@@ -1,3162 +0,0 @@
-/* memcpy with SSSE3
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc) \
- && (defined SHARED \
- || defined USE_AS_MEMMOVE \
- || !defined USE_MULTIARCH)
-
-# include <sysdep.h>
-# include "asm-syntax.h"
-
-# ifndef MEMCPY
-# define MEMCPY __memcpy_ssse3
-# define MEMCPY_CHK __memcpy_chk_ssse3
-# endif
-
-# ifdef USE_AS_BCOPY
-# define SRC PARMS
-# define DEST SRC+4
-# define LEN DEST+4
-# else
-# define DEST PARMS
-# define SRC DEST+4
-# define LEN SRC+4
-# endif
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifdef SHARED
-# define PARMS 8 /* Preserve EBX. */
-# define ENTRANCE PUSH (%ebx);
-# define RETURN_END POP (%ebx); ret
-# define RETURN RETURN_END; CFI_PUSH (%ebx)
-# define JMPTBL(I, B) I - B
-
-/* Load an entry in a jump table into EBX and branch to it. TABLE is a
- jump table with relative offsets. INDEX is a register contains the
- index into the jump table. SCALE is the scale of INDEX. */
-
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- /* We first load PC into EBX. */ \
- SETUP_PIC_REG(bx); \
- /* Get the address of the jump table. */ \
- addl $(TABLE - .), %ebx; \
- /* Get the entry and convert the relative offset to the \
- absolute address. */ \
- addl (%ebx, INDEX, SCALE), %ebx; \
- /* We loaded the jump table. Go. */ \
- jmp *%ebx
-# else
-
-# define PARMS 4
-# define ENTRANCE
-# define RETURN_END ret
-# define RETURN RETURN_END
-# define JMPTBL(I, B) I
-
-/* Branch to an entry in a jump table. TABLE is a jump table with
- absolute offsets. INDEX is a register contains the index into the
- jump table. SCALE is the scale of INDEX. */
-
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- jmp *TABLE(, INDEX, SCALE)
-# endif
-
- .section .text.ssse3,"ax",@progbits
-# if !defined USE_AS_BCOPY
-ENTRY (MEMCPY_CHK)
- movl 12(%esp), %eax
- cmpl %eax, 16(%esp)
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMCPY_CHK)
-# endif
-ENTRY (MEMCPY)
- ENTRANCE
- movl LEN(%esp), %ecx
- movl SRC(%esp), %eax
- movl DEST(%esp), %edx
-
-# ifdef USE_AS_MEMMOVE
- cmp %eax, %edx
- jb L(copy_forward)
- je L(fwd_write_0bytes)
- cmp $32, %ecx
- jae L(memmove_bwd)
- jmp L(bk_write_less32bytes_2)
-
- .p2align 4
-L(memmove_bwd):
- add %ecx, %eax
- cmp %eax, %edx
- movl SRC(%esp), %eax
- jb L(copy_backward)
-
-L(copy_forward):
-# endif
- cmp $48, %ecx
- jae L(48bytesormore)
-
-L(fwd_write_less32bytes):
-# ifndef USE_AS_MEMMOVE
- cmp %dl, %al
- jb L(bk_write)
-# endif
- add %ecx, %edx
- add %ecx, %eax
- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-# ifndef USE_AS_MEMMOVE
- .p2align 4
-L(bk_write):
- BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
-# endif
-
- .p2align 4
-L(48bytesormore):
-# ifndef USE_AS_MEMMOVE
- movlpd (%eax), %xmm0
- movlpd 8(%eax), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 8(%edx)
-# else
- movdqu (%eax), %xmm0
-# endif
- PUSH (%edi)
- movl %edx, %edi
- and $-16, %edx
- add $16, %edx
- sub %edx, %edi
- add %edi, %ecx
- sub %edi, %eax
-
-# ifdef SHARED_CACHE_SIZE_HALF
- cmp $SHARED_CACHE_SIZE_HALF, %ecx
-# else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_shared_cache_size_half, %ecx
-# endif
-# endif
-
- mov %eax, %edi
- jae L(large_page)
- and $0xf, %edi
- jz L(shl_0)
- BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4)
-
- .p2align 4
-L(shl_0):
-# ifdef USE_AS_MEMMOVE
- movl DEST+4(%esp), %edi
- movdqu %xmm0, (%edi)
-# endif
- xor %edi, %edi
- cmp $127, %ecx
- ja L(shl_0_gobble)
- lea -32(%ecx), %ecx
-
- .p2align 4
-L(shl_0_loop):
- movdqa (%eax, %edi), %xmm0
- movdqa 16(%eax, %edi), %xmm1
- sub $32, %ecx
- movdqa %xmm0, (%edx, %edi)
- movdqa %xmm1, 16(%edx, %edi)
- lea 32(%edi), %edi
- jb L(shl_0_end)
-
- movdqa (%eax, %edi), %xmm0
- movdqa 16(%eax, %edi), %xmm1
- sub $32, %ecx
- movdqa %xmm0, (%edx, %edi)
- movdqa %xmm1, 16(%edx, %edi)
- lea 32(%edi), %edi
- jb L(shl_0_end)
-
- movdqa (%eax, %edi), %xmm0
- movdqa 16(%eax, %edi), %xmm1
- sub $32, %ecx
- movdqa %xmm0, (%edx, %edi)
- movdqa %xmm1, 16(%edx, %edi)
- lea 32(%edi), %edi
- jb L(shl_0_end)
-
- movdqa (%eax, %edi), %xmm0
- movdqa 16(%eax, %edi), %xmm1
- sub $32, %ecx
- movdqa %xmm0, (%edx, %edi)
- movdqa %xmm1, 16(%edx, %edi)
- lea 32(%edi), %edi
-
-L(shl_0_end):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- add %edi, %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_0_gobble):
-# ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-# else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-# endif
- POP (%edi)
- lea -128(%ecx), %ecx
- jae L(shl_0_gobble_mem_loop)
-
- .p2align 4
-L(shl_0_gobble_cache_loop):
- movdqa (%eax), %xmm0
- movdqa 0x10(%eax), %xmm1
- movdqa 0x20(%eax), %xmm2
- movdqa 0x30(%eax), %xmm3
- movdqa 0x40(%eax), %xmm4
- movdqa 0x50(%eax), %xmm5
- movdqa 0x60(%eax), %xmm6
- movdqa 0x70(%eax), %xmm7
- lea 0x80(%eax), %eax
- sub $128, %ecx
- movdqa %xmm0, (%edx)
- movdqa %xmm1, 0x10(%edx)
- movdqa %xmm2, 0x20(%edx)
- movdqa %xmm3, 0x30(%edx)
- movdqa %xmm4, 0x40(%edx)
- movdqa %xmm5, 0x50(%edx)
- movdqa %xmm6, 0x60(%edx)
- movdqa %xmm7, 0x70(%edx)
- lea 0x80(%edx), %edx
-
- jae L(shl_0_gobble_cache_loop)
- cmp $-0x40, %ecx
- lea 0x80(%ecx), %ecx
- jl L(shl_0_cache_less_64bytes)
-
- movdqa (%eax), %xmm0
- sub $0x40, %ecx
- movdqa 0x10(%eax), %xmm1
- movdqa %xmm0, (%edx)
- movdqa %xmm1, 0x10(%edx)
- movdqa 0x20(%eax), %xmm0
- movdqa 0x30(%eax), %xmm1
- add $0x40, %eax
- movdqa %xmm0, 0x20(%edx)
- movdqa %xmm1, 0x30(%edx)
- add $0x40, %edx
-
-L(shl_0_cache_less_64bytes):
- cmp $0x20, %ecx
- jb L(shl_0_cache_less_32bytes)
- movdqa (%eax), %xmm0
- sub $0x20, %ecx
- movdqa 0x10(%eax), %xmm1
- add $0x20, %eax
- movdqa %xmm0, (%edx)
- movdqa %xmm1, 0x10(%edx)
- add $0x20, %edx
-
-L(shl_0_cache_less_32bytes):
- cmp $0x10, %ecx
- jb L(shl_0_cache_less_16bytes)
- sub $0x10, %ecx
- movdqa (%eax), %xmm0
- add $0x10, %eax
- movdqa %xmm0, (%edx)
- add $0x10, %edx
-
-L(shl_0_cache_less_16bytes):
- add %ecx, %edx
- add %ecx, %eax
- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
- .p2align 4
-L(shl_0_gobble_mem_loop):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x280(%eax)
- prefetcht0 0x1c0(%edx)
-
- movdqa (%eax), %xmm0
- movdqa 0x10(%eax), %xmm1
- movdqa 0x20(%eax), %xmm2
- movdqa 0x30(%eax), %xmm3
- movdqa 0x40(%eax), %xmm4
- movdqa 0x50(%eax), %xmm5
- movdqa 0x60(%eax), %xmm6
- movdqa 0x70(%eax), %xmm7
- lea 0x80(%eax), %eax
- sub $0x80, %ecx
- movdqa %xmm0, (%edx)
- movdqa %xmm1, 0x10(%edx)
- movdqa %xmm2, 0x20(%edx)
- movdqa %xmm3, 0x30(%edx)
- movdqa %xmm4, 0x40(%edx)
- movdqa %xmm5, 0x50(%edx)
- movdqa %xmm6, 0x60(%edx)
- movdqa %xmm7, 0x70(%edx)
- lea 0x80(%edx), %edx
-
- jae L(shl_0_gobble_mem_loop)
- cmp $-0x40, %ecx
- lea 0x80(%ecx), %ecx
- jl L(shl_0_mem_less_64bytes)
-
- movdqa (%eax), %xmm0
- sub $0x40, %ecx
- movdqa 0x10(%eax), %xmm1
-
- movdqa %xmm0, (%edx)
- movdqa %xmm1, 0x10(%edx)
-
- movdqa 0x20(%eax), %xmm0
- movdqa 0x30(%eax), %xmm1
- add $0x40, %eax
-
- movdqa %xmm0, 0x20(%edx)
- movdqa %xmm1, 0x30(%edx)
- add $0x40, %edx
-
-L(shl_0_mem_less_64bytes):
- cmp $0x20, %ecx
- jb L(shl_0_mem_less_32bytes)
- movdqa (%eax), %xmm0
- sub $0x20, %ecx
- movdqa 0x10(%eax), %xmm1
- add $0x20, %eax
- movdqa %xmm0, (%edx)
- movdqa %xmm1, 0x10(%edx)
- add $0x20, %edx
-
-L(shl_0_mem_less_32bytes):
- cmp $0x10, %ecx
- jb L(shl_0_mem_less_16bytes)
- sub $0x10, %ecx
- movdqa (%eax), %xmm0
- add $0x10, %eax
- movdqa %xmm0, (%edx)
- add $0x10, %edx
-
-L(shl_0_mem_less_16bytes):
- add %ecx, %edx
- add %ecx, %eax
- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4)
-
- .p2align 4
-L(shl_1):
-# ifndef USE_AS_MEMMOVE
- movaps -1(%eax), %xmm1
-# else
- movl DEST+4(%esp), %edi
- movaps -1(%eax), %xmm1
- movdqu %xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-# else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-# endif
- jb L(sh_1_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl1LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 15(%eax), %xmm2
- movaps 31(%eax), %xmm3
- movaps 47(%eax), %xmm4
- movaps 63(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $1, %xmm4, %xmm5
- palignr $1, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $1, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $1, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl1LoopStart)
-
-L(Shl1LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 15(%eax), %xmm2
- movaps 31(%eax), %xmm3
- palignr $1, %xmm2, %xmm3
- palignr $1, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_1_no_prefetch):
- lea -32(%ecx), %ecx
- lea -1(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_1_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $1, %xmm2, %xmm3
- palignr $1, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_1_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $1, %xmm2, %xmm3
- palignr $1, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_1_no_prefetch_loop)
-
-L(sh_1_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 1(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_2):
-# ifndef USE_AS_MEMMOVE
- movaps -2(%eax), %xmm1
-# else
- movl DEST+4(%esp), %edi
- movaps -2(%eax), %xmm1
- movdqu %xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-# else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-# endif
- jb L(sh_2_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl2LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 14(%eax), %xmm2
- movaps 30(%eax), %xmm3
- movaps 46(%eax), %xmm4
- movaps 62(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $2, %xmm4, %xmm5
- palignr $2, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $2, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $2, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl2LoopStart)
-
-L(Shl2LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 14(%eax), %xmm2
- movaps 30(%eax), %xmm3
- palignr $2, %xmm2, %xmm3
- palignr $2, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_2_no_prefetch):
- lea -32(%ecx), %ecx
- lea -2(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_2_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $2, %xmm2, %xmm3
- palignr $2, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_2_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $2, %xmm2, %xmm3
- palignr $2, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_2_no_prefetch_loop)
-
-L(sh_2_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 2(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_3):
-# ifndef USE_AS_MEMMOVE
- movaps -3(%eax), %xmm1
-# else
- movl DEST+4(%esp), %edi
- movaps -3(%eax), %xmm1
- movdqu %xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-# else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-# endif
- jb L(sh_3_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl3LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 13(%eax), %xmm2
- movaps 29(%eax), %xmm3
- movaps 45(%eax), %xmm4
- movaps 61(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $3, %xmm4, %xmm5
- palignr $3, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $3, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $3, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl3LoopStart)
-
-L(Shl3LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 13(%eax), %xmm2
- movaps 29(%eax), %xmm3
- palignr $3, %xmm2, %xmm3
- palignr $3, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_3_no_prefetch):
- lea -32(%ecx), %ecx
- lea -3(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_3_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $3, %xmm2, %xmm3
- palignr $3, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(sh_3_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $3, %xmm2, %xmm3
- palignr $3, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(sh_3_no_prefetch_loop)
-
-L(sh_3_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 3(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_4):
-# ifndef USE_AS_MEMMOVE
- movaps -4(%eax), %xmm1
-# else
- movl DEST+4(%esp), %edi
- movaps -4(%eax), %xmm1
- movdqu %xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-# else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-# endif
- jb L(sh_4_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl4LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 12(%eax), %xmm2
- movaps 28(%eax), %xmm3
- movaps 44(%eax), %xmm4
- movaps 60(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $4, %xmm4, %xmm5
- palignr $4, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $4, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $4, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl4LoopStart)
-
-L(Shl4LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 12(%eax), %xmm2
- movaps 28(%eax), %xmm3
- palignr $4, %xmm2, %xmm3
- palignr $4, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_4_no_prefetch):
- lea -32(%ecx), %ecx
- lea -4(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_4_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $4, %xmm2, %xmm3
- palignr $4, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(sh_4_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $4, %xmm2, %xmm3
- palignr $4, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(sh_4_no_prefetch_loop)
-
-L(sh_4_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 4(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_5):
-# ifndef USE_AS_MEMMOVE
- movaps -5(%eax), %xmm1
-# else
- movl DEST+4(%esp), %edi
- movaps -5(%eax), %xmm1
- movdqu %xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-# else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-# endif
- jb L(sh_5_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl5LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 11(%eax), %xmm2
- movaps 27(%eax), %xmm3
- movaps 43(%eax), %xmm4
- movaps 59(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $5, %xmm4, %xmm5
- palignr $5, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $5, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $5, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl5LoopStart)
-
-L(Shl5LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 11(%eax), %xmm2
- movaps 27(%eax), %xmm3
- palignr $5, %xmm2, %xmm3
- palignr $5, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_5_no_prefetch):
- lea -32(%ecx), %ecx
- lea -5(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_5_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $5, %xmm2, %xmm3
- palignr $5, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(sh_5_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $5, %xmm2, %xmm3
- palignr $5, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(sh_5_no_prefetch_loop)
-
-L(sh_5_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 5(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_6):
-# ifndef USE_AS_MEMMOVE
- movaps -6(%eax), %xmm1
-# else
- movl DEST+4(%esp), %edi
- movaps -6(%eax), %xmm1
- movdqu %xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-# else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-# endif
- jb L(sh_6_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl6LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 10(%eax), %xmm2
- movaps 26(%eax), %xmm3
- movaps 42(%eax), %xmm4
- movaps 58(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $6, %xmm4, %xmm5
- palignr $6, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $6, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $6, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl6LoopStart)
-
-L(Shl6LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 10(%eax), %xmm2
- movaps 26(%eax), %xmm3
- palignr $6, %xmm2, %xmm3
- palignr $6, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_6_no_prefetch):
- lea -32(%ecx), %ecx
- lea -6(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_6_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $6, %xmm2, %xmm3
- palignr $6, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(sh_6_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $6, %xmm2, %xmm3
- palignr $6, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(sh_6_no_prefetch_loop)
-
-L(sh_6_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 6(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_7):
-# ifndef USE_AS_MEMMOVE
- movaps -7(%eax), %xmm1
-# else
- movl DEST+4(%esp), %edi
- movaps -7(%eax), %xmm1
- movdqu %xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-# else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-# endif
- jb L(sh_7_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl7LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 9(%eax), %xmm2
- movaps 25(%eax), %xmm3
- movaps 41(%eax), %xmm4
- movaps 57(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $7, %xmm4, %xmm5
- palignr $7, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $7, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $7, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl7LoopStart)
-
-L(Shl7LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 9(%eax), %xmm2
- movaps 25(%eax), %xmm3
- palignr $7, %xmm2, %xmm3
- palignr $7, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_7_no_prefetch):
- lea -32(%ecx), %ecx
- lea -7(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_7_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $7, %xmm2, %xmm3
- palignr $7, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_7_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $7, %xmm2, %xmm3
- palignr $7, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_7_no_prefetch_loop)
-
-L(sh_7_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 7(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_8):
-# ifndef USE_AS_MEMMOVE
- movaps -8(%eax), %xmm1
-# else
- movl DEST+4(%esp), %edi
- movaps -8(%eax), %xmm1
- movdqu %xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-# else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-# endif
- jb L(sh_8_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl8LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 8(%eax), %xmm2
- movaps 24(%eax), %xmm3
- movaps 40(%eax), %xmm4
- movaps 56(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $8, %xmm4, %xmm5
- palignr $8, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $8, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $8, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl8LoopStart)
-
-L(LoopLeave8):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 8(%eax), %xmm2
- movaps 24(%eax), %xmm3
- palignr $8, %xmm2, %xmm3
- palignr $8, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_8_no_prefetch):
- lea -32(%ecx), %ecx
- lea -8(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_8_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $8, %xmm2, %xmm3
- palignr $8, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_8_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $8, %xmm2, %xmm3
- palignr $8, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_8_no_prefetch_loop)
-
-L(sh_8_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 8(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_9):
-# ifndef USE_AS_MEMMOVE
- movaps -9(%eax), %xmm1
-# else
- movl DEST+4(%esp), %edi
- movaps -9(%eax), %xmm1
- movdqu %xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-# else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-# endif
- jb L(sh_9_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl9LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 7(%eax), %xmm2
- movaps 23(%eax), %xmm3
- movaps 39(%eax), %xmm4
- movaps 55(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $9, %xmm4, %xmm5
- palignr $9, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $9, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $9, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl9LoopStart)
-
-L(Shl9LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 7(%eax), %xmm2
- movaps 23(%eax), %xmm3
- palignr $9, %xmm2, %xmm3
- palignr $9, %xmm1, %xmm2
-
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_9_no_prefetch):
- lea -32(%ecx), %ecx
- lea -9(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_9_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $9, %xmm2, %xmm3
- palignr $9, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_9_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $9, %xmm2, %xmm3
- palignr $9, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_9_no_prefetch_loop)
-
-L(sh_9_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 9(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_10):
-# ifndef USE_AS_MEMMOVE
- movaps -10(%eax), %xmm1
-# else
- movl DEST+4(%esp), %edi
- movaps -10(%eax), %xmm1
- movdqu %xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-# else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-# endif
- jb L(sh_10_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl10LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 6(%eax), %xmm2
- movaps 22(%eax), %xmm3
- movaps 38(%eax), %xmm4
- movaps 54(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $10, %xmm4, %xmm5
- palignr $10, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $10, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $10, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl10LoopStart)
-
-L(Shl10LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 6(%eax), %xmm2
- movaps 22(%eax), %xmm3
- palignr $10, %xmm2, %xmm3
- palignr $10, %xmm1, %xmm2
-
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_10_no_prefetch):
- lea -32(%ecx), %ecx
- lea -10(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_10_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $10, %xmm2, %xmm3
- palignr $10, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_10_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $10, %xmm2, %xmm3
- palignr $10, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_10_no_prefetch_loop)
-
-L(sh_10_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 10(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_11):
-# ifndef USE_AS_MEMMOVE
- movaps -11(%eax), %xmm1
-# else
- movl DEST+4(%esp), %edi
- movaps -11(%eax), %xmm1
- movdqu %xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-# else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-# endif
- jb L(sh_11_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl11LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 5(%eax), %xmm2
- movaps 21(%eax), %xmm3
- movaps 37(%eax), %xmm4
- movaps 53(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $11, %xmm4, %xmm5
- palignr $11, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $11, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $11, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl11LoopStart)
-
-L(Shl11LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 5(%eax), %xmm2
- movaps 21(%eax), %xmm3
- palignr $11, %xmm2, %xmm3
- palignr $11, %xmm1, %xmm2
-
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_11_no_prefetch):
- lea -32(%ecx), %ecx
- lea -11(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_11_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $11, %xmm2, %xmm3
- palignr $11, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_11_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $11, %xmm2, %xmm3
- palignr $11, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_11_no_prefetch_loop)
-
-L(sh_11_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 11(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_12):
-# ifndef USE_AS_MEMMOVE
- movaps -12(%eax), %xmm1
-# else
- movl DEST+4(%esp), %edi
- movaps -12(%eax), %xmm1
- movdqu %xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-# else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-# endif
- jb L(sh_12_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl12LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 4(%eax), %xmm2
- movaps 20(%eax), %xmm3
- movaps 36(%eax), %xmm4
- movaps 52(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $12, %xmm4, %xmm5
- palignr $12, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $12, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $12, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl12LoopStart)
-
-L(Shl12LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 4(%eax), %xmm2
- movaps 20(%eax), %xmm3
- palignr $12, %xmm2, %xmm3
- palignr $12, %xmm1, %xmm2
-
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_12_no_prefetch):
- lea -32(%ecx), %ecx
- lea -12(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_12_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $12, %xmm2, %xmm3
- palignr $12, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_12_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $12, %xmm2, %xmm3
- palignr $12, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_12_no_prefetch_loop)
-
-L(sh_12_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 12(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_13):
-# ifndef USE_AS_MEMMOVE
- movaps -13(%eax), %xmm1
-# else
- movl DEST+4(%esp), %edi
- movaps -13(%eax), %xmm1
- movdqu %xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-# else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-# endif
- jb L(sh_13_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl13LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 3(%eax), %xmm2
- movaps 19(%eax), %xmm3
- movaps 35(%eax), %xmm4
- movaps 51(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $13, %xmm4, %xmm5
- palignr $13, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $13, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $13, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl13LoopStart)
-
-L(Shl13LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 3(%eax), %xmm2
- movaps 19(%eax), %xmm3
- palignr $13, %xmm2, %xmm3
- palignr $13, %xmm1, %xmm2
-
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_13_no_prefetch):
- lea -32(%ecx), %ecx
- lea -13(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_13_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $13, %xmm2, %xmm3
- palignr $13, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_13_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $13, %xmm2, %xmm3
- palignr $13, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_13_no_prefetch_loop)
-
-L(sh_13_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 13(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_14):
-# ifndef USE_AS_MEMMOVE
- movaps -14(%eax), %xmm1
-# else
- movl DEST+4(%esp), %edi
- movaps -14(%eax), %xmm1
- movdqu %xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-# else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-# endif
- jb L(sh_14_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl14LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 2(%eax), %xmm2
- movaps 18(%eax), %xmm3
- movaps 34(%eax), %xmm4
- movaps 50(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $14, %xmm4, %xmm5
- palignr $14, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $14, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $14, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl14LoopStart)
-
-L(Shl14LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 2(%eax), %xmm2
- movaps 18(%eax), %xmm3
- palignr $14, %xmm2, %xmm3
- palignr $14, %xmm1, %xmm2
-
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_14_no_prefetch):
- lea -32(%ecx), %ecx
- lea -14(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_14_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $14, %xmm2, %xmm3
- palignr $14, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_14_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $14, %xmm2, %xmm3
- palignr $14, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_14_no_prefetch_loop)
-
-L(sh_14_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 14(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_15):
-# ifndef USE_AS_MEMMOVE
- movaps -15(%eax), %xmm1
-# else
- movl DEST+4(%esp), %edi
- movaps -15(%eax), %xmm1
- movdqu %xmm0, (%edi)
-# endif
-# ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-# else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-# endif
- jb L(sh_15_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl15LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 1(%eax), %xmm2
- movaps 17(%eax), %xmm3
- movaps 33(%eax), %xmm4
- movaps 49(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $15, %xmm4, %xmm5
- palignr $15, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $15, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $15, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl15LoopStart)
-
-L(Shl15LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 1(%eax), %xmm2
- movaps 17(%eax), %xmm3
- palignr $15, %xmm2, %xmm3
- palignr $15, %xmm1, %xmm2
-
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_15_no_prefetch):
- lea -32(%ecx), %ecx
- lea -15(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_15_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $15, %xmm2, %xmm3
- palignr $15, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_15_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $15, %xmm2, %xmm3
- palignr $15, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_15_no_prefetch_loop)
-
-L(sh_15_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 15(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_end_0):
- lea 32(%ecx), %ecx
- lea (%edx, %ecx), %edx
- lea (%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- .p2align 4
-L(fwd_write_44bytes):
- movq -44(%eax), %xmm0
- movq %xmm0, -44(%edx)
-L(fwd_write_36bytes):
- movq -36(%eax), %xmm0
- movq %xmm0, -36(%edx)
-L(fwd_write_28bytes):
- movq -28(%eax), %xmm0
- movq %xmm0, -28(%edx)
-L(fwd_write_20bytes):
- movq -20(%eax), %xmm0
- movq %xmm0, -20(%edx)
-L(fwd_write_12bytes):
- movq -12(%eax), %xmm0
- movq %xmm0, -12(%edx)
-L(fwd_write_4bytes):
- movl -4(%eax), %ecx
- movl %ecx, -4(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_40bytes):
- movq -40(%eax), %xmm0
- movq %xmm0, -40(%edx)
-L(fwd_write_32bytes):
- movq -32(%eax), %xmm0
- movq %xmm0, -32(%edx)
-L(fwd_write_24bytes):
- movq -24(%eax), %xmm0
- movq %xmm0, -24(%edx)
-L(fwd_write_16bytes):
- movq -16(%eax), %xmm0
- movq %xmm0, -16(%edx)
-L(fwd_write_8bytes):
- movq -8(%eax), %xmm0
- movq %xmm0, -8(%edx)
-L(fwd_write_0bytes):
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_5bytes):
- movl -5(%eax), %ecx
- movl -4(%eax), %eax
- movl %ecx, -5(%edx)
- movl %eax, -4(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_45bytes):
- movq -45(%eax), %xmm0
- movq %xmm0, -45(%edx)
-L(fwd_write_37bytes):
- movq -37(%eax), %xmm0
- movq %xmm0, -37(%edx)
-L(fwd_write_29bytes):
- movq -29(%eax), %xmm0
- movq %xmm0, -29(%edx)
-L(fwd_write_21bytes):
- movq -21(%eax), %xmm0
- movq %xmm0, -21(%edx)
-L(fwd_write_13bytes):
- movq -13(%eax), %xmm0
- movq %xmm0, -13(%edx)
- movl -5(%eax), %ecx
- movl %ecx, -5(%edx)
- movzbl -1(%eax), %ecx
- movb %cl, -1(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_41bytes):
- movq -41(%eax), %xmm0
- movq %xmm0, -41(%edx)
-L(fwd_write_33bytes):
- movq -33(%eax), %xmm0
- movq %xmm0, -33(%edx)
-L(fwd_write_25bytes):
- movq -25(%eax), %xmm0
- movq %xmm0, -25(%edx)
-L(fwd_write_17bytes):
- movq -17(%eax), %xmm0
- movq %xmm0, -17(%edx)
-L(fwd_write_9bytes):
- movq -9(%eax), %xmm0
- movq %xmm0, -9(%edx)
-L(fwd_write_1bytes):
- movzbl -1(%eax), %ecx
- movb %cl, -1(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_46bytes):
- movq -46(%eax), %xmm0
- movq %xmm0, -46(%edx)
-L(fwd_write_38bytes):
- movq -38(%eax), %xmm0
- movq %xmm0, -38(%edx)
-L(fwd_write_30bytes):
- movq -30(%eax), %xmm0
- movq %xmm0, -30(%edx)
-L(fwd_write_22bytes):
- movq -22(%eax), %xmm0
- movq %xmm0, -22(%edx)
-L(fwd_write_14bytes):
- movq -14(%eax), %xmm0
- movq %xmm0, -14(%edx)
-L(fwd_write_6bytes):
- movl -6(%eax), %ecx
- movl %ecx, -6(%edx)
- movzwl -2(%eax), %ecx
- movw %cx, -2(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_42bytes):
- movq -42(%eax), %xmm0
- movq %xmm0, -42(%edx)
-L(fwd_write_34bytes):
- movq -34(%eax), %xmm0
- movq %xmm0, -34(%edx)
-L(fwd_write_26bytes):
- movq -26(%eax), %xmm0
- movq %xmm0, -26(%edx)
-L(fwd_write_18bytes):
- movq -18(%eax), %xmm0
- movq %xmm0, -18(%edx)
-L(fwd_write_10bytes):
- movq -10(%eax), %xmm0
- movq %xmm0, -10(%edx)
-L(fwd_write_2bytes):
- movzwl -2(%eax), %ecx
- movw %cx, -2(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_47bytes):
- movq -47(%eax), %xmm0
- movq %xmm0, -47(%edx)
-L(fwd_write_39bytes):
- movq -39(%eax), %xmm0
- movq %xmm0, -39(%edx)
-L(fwd_write_31bytes):
- movq -31(%eax), %xmm0
- movq %xmm0, -31(%edx)
-L(fwd_write_23bytes):
- movq -23(%eax), %xmm0
- movq %xmm0, -23(%edx)
-L(fwd_write_15bytes):
- movq -15(%eax), %xmm0
- movq %xmm0, -15(%edx)
-L(fwd_write_7bytes):
- movl -7(%eax), %ecx
- movl %ecx, -7(%edx)
- movzwl -3(%eax), %ecx
- movzbl -1(%eax), %eax
- movw %cx, -3(%edx)
- movb %al, -1(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_43bytes):
- movq -43(%eax), %xmm0
- movq %xmm0, -43(%edx)
-L(fwd_write_35bytes):
- movq -35(%eax), %xmm0
- movq %xmm0, -35(%edx)
-L(fwd_write_27bytes):
- movq -27(%eax), %xmm0
- movq %xmm0, -27(%edx)
-L(fwd_write_19bytes):
- movq -19(%eax), %xmm0
- movq %xmm0, -19(%edx)
-L(fwd_write_11bytes):
- movq -11(%eax), %xmm0
- movq %xmm0, -11(%edx)
-L(fwd_write_3bytes):
- movzwl -3(%eax), %ecx
- movzbl -1(%eax), %eax
- movw %cx, -3(%edx)
- movb %al, -1(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_40bytes_align):
- movdqa -40(%eax), %xmm0
- movdqa %xmm0, -40(%edx)
-L(fwd_write_24bytes_align):
- movdqa -24(%eax), %xmm0
- movdqa %xmm0, -24(%edx)
-L(fwd_write_8bytes_align):
- movq -8(%eax), %xmm0
- movq %xmm0, -8(%edx)
-L(fwd_write_0bytes_align):
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_32bytes_align):
- movdqa -32(%eax), %xmm0
- movdqa %xmm0, -32(%edx)
-L(fwd_write_16bytes_align):
- movdqa -16(%eax), %xmm0
- movdqa %xmm0, -16(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_5bytes_align):
- movl -5(%eax), %ecx
- movl -4(%eax), %eax
- movl %ecx, -5(%edx)
- movl %eax, -4(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_45bytes_align):
- movdqa -45(%eax), %xmm0
- movdqa %xmm0, -45(%edx)
-L(fwd_write_29bytes_align):
- movdqa -29(%eax), %xmm0
- movdqa %xmm0, -29(%edx)
-L(fwd_write_13bytes_align):
- movq -13(%eax), %xmm0
- movq %xmm0, -13(%edx)
- movl -5(%eax), %ecx
- movl %ecx, -5(%edx)
- movzbl -1(%eax), %ecx
- movb %cl, -1(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_37bytes_align):
- movdqa -37(%eax), %xmm0
- movdqa %xmm0, -37(%edx)
-L(fwd_write_21bytes_align):
- movdqa -21(%eax), %xmm0
- movdqa %xmm0, -21(%edx)
- movl -5(%eax), %ecx
- movl %ecx, -5(%edx)
- movzbl -1(%eax), %ecx
- movb %cl, -1(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_41bytes_align):
- movdqa -41(%eax), %xmm0
- movdqa %xmm0, -41(%edx)
-L(fwd_write_25bytes_align):
- movdqa -25(%eax), %xmm0
- movdqa %xmm0, -25(%edx)
-L(fwd_write_9bytes_align):
- movq -9(%eax), %xmm0
- movq %xmm0, -9(%edx)
-L(fwd_write_1bytes_align):
- movzbl -1(%eax), %ecx
- movb %cl, -1(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_33bytes_align):
- movdqa -33(%eax), %xmm0
- movdqa %xmm0, -33(%edx)
-L(fwd_write_17bytes_align):
- movdqa -17(%eax), %xmm0
- movdqa %xmm0, -17(%edx)
- movzbl -1(%eax), %ecx
- movb %cl, -1(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_46bytes_align):
- movdqa -46(%eax), %xmm0
- movdqa %xmm0, -46(%edx)
-L(fwd_write_30bytes_align):
- movdqa -30(%eax), %xmm0
- movdqa %xmm0, -30(%edx)
-L(fwd_write_14bytes_align):
- movq -14(%eax), %xmm0
- movq %xmm0, -14(%edx)
-L(fwd_write_6bytes_align):
- movl -6(%eax), %ecx
- movl %ecx, -6(%edx)
- movzwl -2(%eax), %ecx
- movw %cx, -2(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_38bytes_align):
- movdqa -38(%eax), %xmm0
- movdqa %xmm0, -38(%edx)
-L(fwd_write_22bytes_align):
- movdqa -22(%eax), %xmm0
- movdqa %xmm0, -22(%edx)
- movl -6(%eax), %ecx
- movl %ecx, -6(%edx)
- movzwl -2(%eax), %ecx
- movw %cx, -2(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_42bytes_align):
- movdqa -42(%eax), %xmm0
- movdqa %xmm0, -42(%edx)
-L(fwd_write_26bytes_align):
- movdqa -26(%eax), %xmm0
- movdqa %xmm0, -26(%edx)
-L(fwd_write_10bytes_align):
- movq -10(%eax), %xmm0
- movq %xmm0, -10(%edx)
-L(fwd_write_2bytes_align):
- movzwl -2(%eax), %ecx
- movw %cx, -2(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_34bytes_align):
- movdqa -34(%eax), %xmm0
- movdqa %xmm0, -34(%edx)
-L(fwd_write_18bytes_align):
- movdqa -18(%eax), %xmm0
- movdqa %xmm0, -18(%edx)
- movzwl -2(%eax), %ecx
- movw %cx, -2(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_47bytes_align):
- movdqa -47(%eax), %xmm0
- movdqa %xmm0, -47(%edx)
-L(fwd_write_31bytes_align):
- movdqa -31(%eax), %xmm0
- movdqa %xmm0, -31(%edx)
-L(fwd_write_15bytes_align):
- movq -15(%eax), %xmm0
- movq %xmm0, -15(%edx)
-L(fwd_write_7bytes_align):
- movl -7(%eax), %ecx
- movl %ecx, -7(%edx)
- movzwl -3(%eax), %ecx
- movzbl -1(%eax), %eax
- movw %cx, -3(%edx)
- movb %al, -1(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_39bytes_align):
- movdqa -39(%eax), %xmm0
- movdqa %xmm0, -39(%edx)
-L(fwd_write_23bytes_align):
- movdqa -23(%eax), %xmm0
- movdqa %xmm0, -23(%edx)
- movl -7(%eax), %ecx
- movl %ecx, -7(%edx)
- movzwl -3(%eax), %ecx
- movzbl -1(%eax), %eax
- movw %cx, -3(%edx)
- movb %al, -1(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_43bytes_align):
- movdqa -43(%eax), %xmm0
- movdqa %xmm0, -43(%edx)
-L(fwd_write_27bytes_align):
- movdqa -27(%eax), %xmm0
- movdqa %xmm0, -27(%edx)
-L(fwd_write_11bytes_align):
- movq -11(%eax), %xmm0
- movq %xmm0, -11(%edx)
-L(fwd_write_3bytes_align):
- movzwl -3(%eax), %ecx
- movzbl -1(%eax), %eax
- movw %cx, -3(%edx)
- movb %al, -1(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_35bytes_align):
- movdqa -35(%eax), %xmm0
- movdqa %xmm0, -35(%edx)
-L(fwd_write_19bytes_align):
- movdqa -19(%eax), %xmm0
- movdqa %xmm0, -19(%edx)
- movzwl -3(%eax), %ecx
- movzbl -1(%eax), %eax
- movw %cx, -3(%edx)
- movb %al, -1(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_44bytes_align):
- movdqa -44(%eax), %xmm0
- movdqa %xmm0, -44(%edx)
-L(fwd_write_28bytes_align):
- movdqa -28(%eax), %xmm0
- movdqa %xmm0, -28(%edx)
-L(fwd_write_12bytes_align):
- movq -12(%eax), %xmm0
- movq %xmm0, -12(%edx)
-L(fwd_write_4bytes_align):
- movl -4(%eax), %ecx
- movl %ecx, -4(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(fwd_write_36bytes_align):
- movdqa -36(%eax), %xmm0
- movdqa %xmm0, -36(%edx)
-L(fwd_write_20bytes_align):
- movdqa -20(%eax), %xmm0
- movdqa %xmm0, -20(%edx)
- movl -4(%eax), %ecx
- movl %ecx, -4(%edx)
-# ifndef USE_AS_BCOPY
-# ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-# else
- movl DEST(%esp), %eax
-# endif
-# endif
- RETURN_END
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(large_page):
- movdqu (%eax), %xmm1
-# ifdef USE_AS_MEMMOVE
- movl DEST+4(%esp), %edi
- movdqu %xmm0, (%edi)
-# endif
- lea 16(%eax), %eax
- movntdq %xmm1, (%edx)
- lea 16(%edx), %edx
- lea -0x90(%ecx), %ecx
- POP (%edi)
-
- .p2align 4
-L(large_page_loop):
- movdqu (%eax), %xmm0
- movdqu 0x10(%eax), %xmm1
- movdqu 0x20(%eax), %xmm2
- movdqu 0x30(%eax), %xmm3
- movdqu 0x40(%eax), %xmm4
- movdqu 0x50(%eax), %xmm5
- movdqu 0x60(%eax), %xmm6
- movdqu 0x70(%eax), %xmm7
- lea 0x80(%eax), %eax
-
- sub $0x80, %ecx
- movntdq %xmm0, (%edx)
- movntdq %xmm1, 0x10(%edx)
- movntdq %xmm2, 0x20(%edx)
- movntdq %xmm3, 0x30(%edx)
- movntdq %xmm4, 0x40(%edx)
- movntdq %xmm5, 0x50(%edx)
- movntdq %xmm6, 0x60(%edx)
- movntdq %xmm7, 0x70(%edx)
- lea 0x80(%edx), %edx
- jae L(large_page_loop)
- cmp $-0x40, %ecx
- lea 0x80(%ecx), %ecx
- jl L(large_page_less_64bytes)
-
- movdqu (%eax), %xmm0
- movdqu 0x10(%eax), %xmm1
- movdqu 0x20(%eax), %xmm2
- movdqu 0x30(%eax), %xmm3
- lea 0x40(%eax), %eax
-
- movntdq %xmm0, (%edx)
- movntdq %xmm1, 0x10(%edx)
- movntdq %xmm2, 0x20(%edx)
- movntdq %xmm3, 0x30(%edx)
- lea 0x40(%edx), %edx
- sub $0x40, %ecx
-L(large_page_less_64bytes):
- cmp $32, %ecx
- jb L(large_page_less_32bytes)
- movdqu (%eax), %xmm0
- movdqu 0x10(%eax), %xmm1
- lea 0x20(%eax), %eax
- movntdq %xmm0, (%edx)
- movntdq %xmm1, 0x10(%edx)
- lea 0x20(%edx), %edx
- sub $0x20, %ecx
-L(large_page_less_32bytes):
- add %ecx, %edx
- add %ecx, %eax
- sfence
- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
- .p2align 4
-L(bk_write_44bytes):
- movq 36(%eax), %xmm0
- movq %xmm0, 36(%edx)
-L(bk_write_36bytes):
- movq 28(%eax), %xmm0
- movq %xmm0, 28(%edx)
-L(bk_write_28bytes):
- movq 20(%eax), %xmm0
- movq %xmm0, 20(%edx)
-L(bk_write_20bytes):
- movq 12(%eax), %xmm0
- movq %xmm0, 12(%edx)
-L(bk_write_12bytes):
- movq 4(%eax), %xmm0
- movq %xmm0, 4(%edx)
-L(bk_write_4bytes):
- movl (%eax), %ecx
- movl %ecx, (%edx)
-L(bk_write_0bytes):
-# ifndef USE_AS_BCOPY
- movl DEST(%esp), %eax
-# ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(bk_write_40bytes):
- movq 32(%eax), %xmm0
- movq %xmm0, 32(%edx)
-L(bk_write_32bytes):
- movq 24(%eax), %xmm0
- movq %xmm0, 24(%edx)
-L(bk_write_24bytes):
- movq 16(%eax), %xmm0
- movq %xmm0, 16(%edx)
-L(bk_write_16bytes):
- movq 8(%eax), %xmm0
- movq %xmm0, 8(%edx)
-L(bk_write_8bytes):
- movq (%eax), %xmm0
- movq %xmm0, (%edx)
-# ifndef USE_AS_BCOPY
- movl DEST(%esp), %eax
-# ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(bk_write_45bytes):
- movq 37(%eax), %xmm0
- movq %xmm0, 37(%edx)
-L(bk_write_37bytes):
- movq 29(%eax), %xmm0
- movq %xmm0, 29(%edx)
-L(bk_write_29bytes):
- movq 21(%eax), %xmm0
- movq %xmm0, 21(%edx)
-L(bk_write_21bytes):
- movq 13(%eax), %xmm0
- movq %xmm0, 13(%edx)
-L(bk_write_13bytes):
- movq 5(%eax), %xmm0
- movq %xmm0, 5(%edx)
-L(bk_write_5bytes):
- movl 1(%eax), %ecx
- movl %ecx, 1(%edx)
-L(bk_write_1bytes):
- movzbl (%eax), %ecx
- movb %cl, (%edx)
-# ifndef USE_AS_BCOPY
- movl DEST(%esp), %eax
-# ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(bk_write_41bytes):
- movq 33(%eax), %xmm0
- movq %xmm0, 33(%edx)
-L(bk_write_33bytes):
- movq 25(%eax), %xmm0
- movq %xmm0, 25(%edx)
-L(bk_write_25bytes):
- movq 17(%eax), %xmm0
- movq %xmm0, 17(%edx)
-L(bk_write_17bytes):
- movq 9(%eax), %xmm0
- movq %xmm0, 9(%edx)
-L(bk_write_9bytes):
- movq 1(%eax), %xmm0
- movq %xmm0, 1(%edx)
- movzbl (%eax), %ecx
- movb %cl, (%edx)
-# ifndef USE_AS_BCOPY
- movl DEST(%esp), %eax
-# ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(bk_write_46bytes):
- movq 38(%eax), %xmm0
- movq %xmm0, 38(%edx)
-L(bk_write_38bytes):
- movq 30(%eax), %xmm0
- movq %xmm0, 30(%edx)
-L(bk_write_30bytes):
- movq 22(%eax), %xmm0
- movq %xmm0, 22(%edx)
-L(bk_write_22bytes):
- movq 14(%eax), %xmm0
- movq %xmm0, 14(%edx)
-L(bk_write_14bytes):
- movq 6(%eax), %xmm0
- movq %xmm0, 6(%edx)
-L(bk_write_6bytes):
- movl 2(%eax), %ecx
- movl %ecx, 2(%edx)
- movzwl (%eax), %ecx
- movw %cx, (%edx)
-# ifndef USE_AS_BCOPY
- movl DEST(%esp), %eax
-# ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(bk_write_42bytes):
- movq 34(%eax), %xmm0
- movq %xmm0, 34(%edx)
-L(bk_write_34bytes):
- movq 26(%eax), %xmm0
- movq %xmm0, 26(%edx)
-L(bk_write_26bytes):
- movq 18(%eax), %xmm0
- movq %xmm0, 18(%edx)
-L(bk_write_18bytes):
- movq 10(%eax), %xmm0
- movq %xmm0, 10(%edx)
-L(bk_write_10bytes):
- movq 2(%eax), %xmm0
- movq %xmm0, 2(%edx)
-L(bk_write_2bytes):
- movzwl (%eax), %ecx
- movw %cx, (%edx)
-# ifndef USE_AS_BCOPY
- movl DEST(%esp), %eax
-# ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(bk_write_47bytes):
- movq 39(%eax), %xmm0
- movq %xmm0, 39(%edx)
-L(bk_write_39bytes):
- movq 31(%eax), %xmm0
- movq %xmm0, 31(%edx)
-L(bk_write_31bytes):
- movq 23(%eax), %xmm0
- movq %xmm0, 23(%edx)
-L(bk_write_23bytes):
- movq 15(%eax), %xmm0
- movq %xmm0, 15(%edx)
-L(bk_write_15bytes):
- movq 7(%eax), %xmm0
- movq %xmm0, 7(%edx)
-L(bk_write_7bytes):
- movl 3(%eax), %ecx
- movl %ecx, 3(%edx)
- movzwl 1(%eax), %ecx
- movw %cx, 1(%edx)
- movzbl (%eax), %eax
- movb %al, (%edx)
-# ifndef USE_AS_BCOPY
- movl DEST(%esp), %eax
-# ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(bk_write_43bytes):
- movq 35(%eax), %xmm0
- movq %xmm0, 35(%edx)
-L(bk_write_35bytes):
- movq 27(%eax), %xmm0
- movq %xmm0, 27(%edx)
-L(bk_write_27bytes):
- movq 19(%eax), %xmm0
- movq %xmm0, 19(%edx)
-L(bk_write_19bytes):
- movq 11(%eax), %xmm0
- movq %xmm0, 11(%edx)
-L(bk_write_11bytes):
- movq 3(%eax), %xmm0
- movq %xmm0, 3(%edx)
-L(bk_write_3bytes):
- movzwl 1(%eax), %ecx
- movw %cx, 1(%edx)
- movzbl (%eax), %eax
- movb %al, (%edx)
-# ifndef USE_AS_BCOPY
- movl DEST(%esp), %eax
-# ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-# endif
-# endif
- RETURN_END
-
-
- .pushsection .rodata.ssse3,"a",@progbits
- .p2align 2
-L(table_48bytes_fwd):
- .int JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd))
-
- .p2align 2
-L(table_48bytes_fwd_align):
- .int JMPTBL (L(fwd_write_0bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_1bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_2bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_3bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_4bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_5bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_6bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_7bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_8bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_9bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_10bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_11bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_12bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_13bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_14bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_15bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_16bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_17bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_18bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_19bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_20bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_21bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_22bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_23bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_24bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_25bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_26bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_27bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_28bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_29bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_30bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_31bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_32bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_33bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_34bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_35bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_36bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_37bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_38bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_39bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_40bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_41bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_42bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_43bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_44bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_45bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_46bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_47bytes_align), L(table_48bytes_fwd_align))
-
- .p2align 2
-L(shl_table):
- .int JMPTBL (L(shl_0), L(shl_table))
- .int JMPTBL (L(shl_1), L(shl_table))
- .int JMPTBL (L(shl_2), L(shl_table))
- .int JMPTBL (L(shl_3), L(shl_table))
- .int JMPTBL (L(shl_4), L(shl_table))
- .int JMPTBL (L(shl_5), L(shl_table))
- .int JMPTBL (L(shl_6), L(shl_table))
- .int JMPTBL (L(shl_7), L(shl_table))
- .int JMPTBL (L(shl_8), L(shl_table))
- .int JMPTBL (L(shl_9), L(shl_table))
- .int JMPTBL (L(shl_10), L(shl_table))
- .int JMPTBL (L(shl_11), L(shl_table))
- .int JMPTBL (L(shl_12), L(shl_table))
- .int JMPTBL (L(shl_13), L(shl_table))
- .int JMPTBL (L(shl_14), L(shl_table))
- .int JMPTBL (L(shl_15), L(shl_table))
-
- .p2align 2
-L(table_48_bytes_bwd):
- .int JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd))
-
- .popsection
-
-# ifdef USE_AS_MEMMOVE
- .p2align 4
-L(copy_backward):
- PUSH (%edi)
- movl %eax, %edi
- lea (%ecx,%edx,1),%edx
- lea (%ecx,%edi,1),%edi
- testl $0x3, %edx
- jnz L(bk_align)
-
-L(bk_aligned_4):
- cmp $64, %ecx
- jae L(bk_write_more64bytes)
-
-L(bk_write_64bytesless):
- cmp $32, %ecx
- jb L(bk_write_less32bytes)
-
-L(bk_write_more32bytes):
- /* Copy 32 bytes at a time. */
- sub $32, %ecx
- movq -8(%edi), %xmm0
- movq %xmm0, -8(%edx)
- movq -16(%edi), %xmm0
- movq %xmm0, -16(%edx)
- movq -24(%edi), %xmm0
- movq %xmm0, -24(%edx)
- movq -32(%edi), %xmm0
- movq %xmm0, -32(%edx)
- sub $32, %edx
- sub $32, %edi
-
-L(bk_write_less32bytes):
- movl %edi, %eax
- sub %ecx, %edx
- sub %ecx, %eax
- POP (%edi)
-L(bk_write_less32bytes_2):
- BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(bk_align):
- cmp $8, %ecx
- jbe L(bk_write_less32bytes)
- testl $1, %edx
- /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
- then (EDX & 2) must be != 0. */
- jz L(bk_got2)
- sub $1, %edi
- sub $1, %ecx
- sub $1, %edx
- movzbl (%edi), %eax
- movb %al, (%edx)
-
- testl $2, %edx
- jz L(bk_aligned_4)
-
-L(bk_got2):
- sub $2, %edi
- sub $2, %ecx
- sub $2, %edx
- movzwl (%edi), %eax
- movw %ax, (%edx)
- jmp L(bk_aligned_4)
-
- .p2align 4
-L(bk_write_more64bytes):
- /* Check alignment of last byte. */
- testl $15, %edx
- jz L(bk_ssse3_cpy_pre)
-
-/* EDX is aligned 4 bytes, but not 16 bytes. */
-L(bk_ssse3_align):
- sub $4, %edi
- sub $4, %ecx
- sub $4, %edx
- movl (%edi), %eax
- movl %eax, (%edx)
-
- testl $15, %edx
- jz L(bk_ssse3_cpy_pre)
-
- sub $4, %edi
- sub $4, %ecx
- sub $4, %edx
- movl (%edi), %eax
- movl %eax, (%edx)
-
- testl $15, %edx
- jz L(bk_ssse3_cpy_pre)
-
- sub $4, %edi
- sub $4, %ecx
- sub $4, %edx
- movl (%edi), %eax
- movl %eax, (%edx)
-
-L(bk_ssse3_cpy_pre):
- cmp $64, %ecx
- jb L(bk_write_more32bytes)
-
- .p2align 4
-L(bk_ssse3_cpy):
- sub $64, %edi
- sub $64, %ecx
- sub $64, %edx
- movdqu 0x30(%edi), %xmm3
- movdqa %xmm3, 0x30(%edx)
- movdqu 0x20(%edi), %xmm2
- movdqa %xmm2, 0x20(%edx)
- movdqu 0x10(%edi), %xmm1
- movdqa %xmm1, 0x10(%edx)
- movdqu (%edi), %xmm0
- movdqa %xmm0, (%edx)
- cmp $64, %ecx
- jae L(bk_ssse3_cpy)
- jmp L(bk_write_64bytesless)
-
-# endif
-
-END (MEMCPY)
-
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memcpy.S b/sysdeps/i386/i686/multiarch/memcpy.S
deleted file mode 100644
index f725944620..0000000000
--- a/sysdeps/i386/i686/multiarch/memcpy.S
+++ /dev/null
@@ -1,78 +0,0 @@
-/* Multiple versions of memcpy
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
- DSO. In static binaries we need memcpy before the initialization
- happened. */
-#if defined SHARED && IS_IN (libc)
- .text
-ENTRY(memcpy)
- .type memcpy, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__memcpy_ia32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__memcpy_sse2_unaligned)
- HAS_ARCH_FEATURE (Fast_Unaligned_Load)
- jnz 2f
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- LOAD_FUNC_GOT_EAX (__memcpy_ssse3)
- HAS_ARCH_FEATURE (Fast_Rep_String)
- jz 2f
- LOAD_FUNC_GOT_EAX (__memcpy_ssse3_rep)
-2: ret
-END(memcpy)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __memcpy_ia32, @function; \
- .p2align 4; \
- .globl __memcpy_ia32; \
- .hidden __memcpy_ia32; \
- __memcpy_ia32: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __memcpy_ia32, .-__memcpy_ia32
-
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
- .type __memcpy_chk_ia32, @function; \
- .globl __memcpy_chk_ia32; \
- .p2align 4; \
- __memcpy_chk_ia32: cfi_startproc; \
- CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
- cfi_endproc; .size __memcpy_chk_ia32, .-__memcpy_chk_ia32
-
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
- they will be called without setting up EBX needed for PLT which is
- used by IFUNC. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_memcpy; __GI_memcpy = __memcpy_ia32
-#endif
-
-#include "../memcpy.S"
diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.S b/sysdeps/i386/i686/multiarch/memcpy_chk.S
deleted file mode 100644
index 1b4fbe2e6f..0000000000
--- a/sysdeps/i386/i686/multiarch/memcpy_chk.S
+++ /dev/null
@@ -1,50 +0,0 @@
-/* Multiple versions of __memcpy_chk
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
- DSO. There are no multiarch memcpy functions for static binaries.
- */
-#if IS_IN (libc)
-# ifdef SHARED
- .text
-ENTRY(__memcpy_chk)
- .type __memcpy_chk, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__memcpy_chk_ia32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__memcpy_chk_sse2_unaligned)
- HAS_ARCH_FEATURE (Fast_Unaligned_Load)
- jnz 2f
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- LOAD_FUNC_GOT_EAX (__memcpy_chk_ssse3)
- HAS_ARCH_FEATURE (Fast_Rep_String)
- jz 2f
- LOAD_FUNC_GOT_EAX (__memcpy_chk_ssse3_rep)
-2: ret
-END(__memcpy_chk)
-# else
-# include "../memcpy_chk.S"
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S
deleted file mode 100644
index 3873594cb2..0000000000
--- a/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMMOVE
-#define MEMCPY __memmove_sse2_unaligned
-#define MEMCPY_CHK __memmove_chk_sse2_unaligned
-#include "memcpy-sse2-unaligned.S"
diff --git a/sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S b/sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S
deleted file mode 100644
index d202fc4a13..0000000000
--- a/sysdeps/i386/i686/multiarch/memmove-ssse3-rep.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMMOVE
-#define MEMCPY __memmove_ssse3_rep
-#define MEMCPY_CHK __memmove_chk_ssse3_rep
-#include "memcpy-ssse3-rep.S"
diff --git a/sysdeps/i386/i686/multiarch/memmove-ssse3.S b/sysdeps/i386/i686/multiarch/memmove-ssse3.S
deleted file mode 100644
index 295430b1ef..0000000000
--- a/sysdeps/i386/i686/multiarch/memmove-ssse3.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMMOVE
-#define MEMCPY __memmove_ssse3
-#define MEMCPY_CHK __memmove_chk_ssse3
-#include "memcpy-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/memmove.S b/sysdeps/i386/i686/multiarch/memmove.S
deleted file mode 100644
index 6eb418ca7f..0000000000
--- a/sysdeps/i386/i686/multiarch/memmove.S
+++ /dev/null
@@ -1,89 +0,0 @@
-/* Multiple versions of memmove
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib. */
-#if IS_IN (libc)
- .text
-ENTRY(memmove)
- .type memmove, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__memmove_ia32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__memmove_sse2_unaligned)
- HAS_ARCH_FEATURE (Fast_Unaligned_Load)
- jnz 2f
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- LOAD_FUNC_GOT_EAX (__memmove_ssse3)
- HAS_ARCH_FEATURE (Fast_Rep_String)
- jz 2f
- LOAD_FUNC_GOT_EAX (__memmove_ssse3_rep)
-2: ret
-END(memmove)
-
-# ifdef SHARED
-# undef ENTRY
-# define ENTRY(name) \
- .type __memmove_ia32, @function; \
- .p2align 4; \
- .globl __memmove_ia32; \
- .hidden __memmove_ia32; \
- __memmove_ia32: cfi_startproc; \
- CALL_MCOUNT
-# else
-# undef ENTRY
-# define ENTRY(name) \
- .type __memmove_ia32, @function; \
- .globl __memmove_ia32; \
- .p2align 4; \
- __memmove_ia32: cfi_startproc; \
- CALL_MCOUNT
-# endif
-
-# undef END
-# define END(name) \
- cfi_endproc; .size __memmove_ia32, .-__memmove_ia32
-
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
- .type __memmove_chk_ia32, @function; \
- .globl __memmove_chk_ia32; \
- .p2align 4; \
- __memmove_chk_ia32: cfi_startproc; \
- CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
- cfi_endproc; .size __memmove_chk_ia32, .-__memmove_chk_ia32
-
-# ifdef SHARED
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
- they will be called without setting up EBX needed for PLT which is
- used by IFUNC. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_memmove; __GI_memmove = __memmove_ia32
-# endif
-#endif
-
-#include "../memmove.S"
diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.S b/sysdeps/i386/i686/multiarch/memmove_chk.S
deleted file mode 100644
index 314834c4c6..0000000000
--- a/sysdeps/i386/i686/multiarch/memmove_chk.S
+++ /dev/null
@@ -1,94 +0,0 @@
-/* Multiple versions of __memmove_chk
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib. */
-#if IS_IN (libc)
- .text
-ENTRY(__memmove_chk)
- .type __memmove_chk, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__memmove_chk_ia32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__memmove_chk_sse2_unaligned)
- HAS_ARCH_FEATURE (Fast_Unaligned_Load)
- jnz 2f
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- LOAD_FUNC_GOT_EAX (__memmove_chk_ssse3)
- HAS_ARCH_FEATURE (Fast_Rep_String)
- jz 2f
- LOAD_FUNC_GOT_EAX (__memmove_chk_ssse3_rep)
-2: ret
-END(__memmove_chk)
-
-# ifndef SHARED
- .type __memmove_chk_sse2_unaligned, @function
- .p2align 4;
-__memmove_chk_sse2_unaligned:
- cfi_startproc
- CALL_MCOUNT
- movl 12(%esp), %eax
- cmpl %eax, 16(%esp)
- jb __chk_fail
- jmp __memmove_sse2_unaligned
- cfi_endproc
- .size __memmove_chk_sse2_unaligned, .-__memmove_chk_sse2_unaligned
-
- .type __memmove_chk_ssse3, @function
- .p2align 4;
-__memmove_chk_ssse3:
- cfi_startproc
- CALL_MCOUNT
- movl 12(%esp), %eax
- cmpl %eax, 16(%esp)
- jb __chk_fail
- jmp __memmove_ssse3
- cfi_endproc
- .size __memmove_chk_ssse3, .-__memmove_chk_ssse3
-
- .type __memmove_chk_ssse3_rep, @function
- .p2align 4;
-__memmove_chk_ssse3_rep:
- cfi_startproc
- CALL_MCOUNT
- movl 12(%esp), %eax
- cmpl %eax, 16(%esp)
- jb __chk_fail
- jmp __memmove_ssse3_rep
- cfi_endproc
- .size __memmove_chk_ssse3_rep, .-__memmove_chk_ssse3_rep
-
- .type __memmove_chk_ia32, @function
- .p2align 4;
-__memmove_chk_ia32:
- cfi_startproc
- CALL_MCOUNT
- movl 12(%esp), %eax
- cmpl %eax, 16(%esp)
- jb __chk_fail
- jmp __memmove_ia32
- cfi_endproc
- .size __memmove_chk_ia32, .-__memmove_chk_ia32
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S
deleted file mode 100644
index a1cea50771..0000000000
--- a/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMPCPY
-#define MEMCPY __mempcpy_sse2_unaligned
-#define MEMCPY_CHK __mempcpy_chk_sse2_unaligned
-#include "memcpy-sse2-unaligned.S"
diff --git a/sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S b/sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S
deleted file mode 100644
index 5357b33e18..0000000000
--- a/sysdeps/i386/i686/multiarch/mempcpy-ssse3-rep.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMPCPY
-#define MEMCPY __mempcpy_ssse3_rep
-#define MEMCPY_CHK __mempcpy_chk_ssse3_rep
-#include "memcpy-ssse3-rep.S"
diff --git a/sysdeps/i386/i686/multiarch/mempcpy-ssse3.S b/sysdeps/i386/i686/multiarch/mempcpy-ssse3.S
deleted file mode 100644
index 822d98e954..0000000000
--- a/sysdeps/i386/i686/multiarch/mempcpy-ssse3.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMPCPY
-#define MEMCPY __mempcpy_ssse3
-#define MEMCPY_CHK __mempcpy_chk_ssse3
-#include "memcpy-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/mempcpy.S b/sysdeps/i386/i686/multiarch/mempcpy.S
deleted file mode 100644
index 06e377fbc9..0000000000
--- a/sysdeps/i386/i686/multiarch/mempcpy.S
+++ /dev/null
@@ -1,81 +0,0 @@
-/* Multiple versions of mempcpy
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
- DSO. In static binaries we need mempcpy before the initialization
- happened. */
-#if defined SHARED && IS_IN (libc)
- .text
-ENTRY(__mempcpy)
- .type __mempcpy, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__mempcpy_ia32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__mempcpy_sse2_unaligned)
- HAS_ARCH_FEATURE (Fast_Unaligned_Load)
- jnz 2f
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- LOAD_FUNC_GOT_EAX (__mempcpy_ssse3)
- HAS_ARCH_FEATURE (Fast_Rep_String)
- jz 2f
- LOAD_FUNC_GOT_EAX (__mempcpy_ssse3_rep)
-2: ret
-END(__mempcpy)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __mempcpy_ia32, @function; \
- .p2align 4; \
- .globl __mempcpy_ia32; \
- .hidden __mempcpy_ia32; \
- __mempcpy_ia32: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __mempcpy_ia32, .-__mempcpy_ia32
-
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
- .type __mempcpy_chk_ia32, @function; \
- .globl __mempcpy_chk_ia32; \
- .p2align 4; \
- __mempcpy_chk_ia32: cfi_startproc; \
- CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
- cfi_endproc; .size __mempcpy_chk_ia32, .-__mempcpy_chk_ia32
-
-# undef libc_hidden_def
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
- they will be called without setting up EBX needed for PLT which is
- used by IFUNC. */
-# define libc_hidden_def(name) \
- .globl __GI_mempcpy; __GI_mempcpy = __mempcpy_ia32
-# define libc_hidden_builtin_def(name) \
- .globl __GI___mempcpy; __GI___mempcpy = __mempcpy_ia32
-#endif
-
-#include "../mempcpy.S"
diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.S b/sysdeps/i386/i686/multiarch/mempcpy_chk.S
deleted file mode 100644
index e13e5248a5..0000000000
--- a/sysdeps/i386/i686/multiarch/mempcpy_chk.S
+++ /dev/null
@@ -1,50 +0,0 @@
-/* Multiple versions of __mempcpy_chk
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
- DSO. There are no multiarch mempcpy functions for static binaries.
- */
-#if IS_IN (libc)
-# ifdef SHARED
- .text
-ENTRY(__mempcpy_chk)
- .type __mempcpy_chk, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__mempcpy_chk_ia32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__mempcpy_chk_sse2_unaligned)
- HAS_ARCH_FEATURE (Fast_Unaligned_Load)
- jnz 2f
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- LOAD_FUNC_GOT_EAX (__mempcpy_chk_ssse3)
- HAS_ARCH_FEATURE (Fast_Rep_String)
- jz 2f
- LOAD_FUNC_GOT_EAX (__mempcpy_chk_ssse3_rep)
-2: ret
-END(__mempcpy_chk)
-# else
-# include "../mempcpy_chk.S"
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memrchr-c.c b/sysdeps/i386/i686/multiarch/memrchr-c.c
deleted file mode 100644
index ef7bbbe792..0000000000
--- a/sysdeps/i386/i686/multiarch/memrchr-c.c
+++ /dev/null
@@ -1,7 +0,0 @@
-#if IS_IN (libc)
-# define MEMRCHR __memrchr_ia32
-# include <string.h>
-extern void *__memrchr_ia32 (const void *, int, size_t);
-#endif
-
-#include "string/memrchr.c"
diff --git a/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S
deleted file mode 100644
index dbbe94fd08..0000000000
--- a/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S
+++ /dev/null
@@ -1,417 +0,0 @@
-/* Optimized memrchr with sse2
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS 4
-# define STR1 PARMS
-# define STR2 STR1+4
-# define LEN STR2+4
-
-# define MEMCHR __memrchr_sse2_bsf
-
- .text
-ENTRY (MEMCHR)
- mov STR1(%esp), %ecx
- movd STR2(%esp), %xmm1
- mov LEN(%esp), %edx
-
- sub $16, %edx
- jbe L(length_less16)
-
- punpcklbw %xmm1, %xmm1
- add %edx, %ecx
- punpcklbw %xmm1, %xmm1
-
- movdqu (%ecx), %xmm0
- pshufd $0, %xmm1, %xmm1
- pcmpeqb %xmm1, %xmm0
-
-/* Check if there is a match. */
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches0)
-
- sub $64, %ecx
- mov %ecx, %eax
- and $15, %eax
- jz L(loop_prolog)
-
- add $16, %ecx
- add $16, %edx
- sub %eax, %ecx
- sub %eax, %edx
-
- .p2align 4
-/* Loop start on aligned string. */
-L(loop_prolog):
- sub $64, %edx
- jbe L(exit_loop)
-
- movdqa 48(%ecx), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches48)
-
- movdqa 32(%ecx), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 16(%ecx), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa (%ecx), %xmm4
- pcmpeqb %xmm1, %xmm4
- pmovmskb %xmm4, %eax
- test %eax, %eax
- jnz L(matches0)
-
- sub $64, %ecx
- sub $64, %edx
- jbe L(exit_loop)
-
- movdqa 48(%ecx), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches48)
-
- movdqa 32(%ecx), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 16(%ecx), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa (%ecx), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches0)
-
- mov %ecx, %eax
- and $63, %eax
- test %eax, %eax
- jz L(align64_loop)
-
- add $64, %ecx
- add $64, %edx
- sub %eax, %ecx
- sub %eax, %edx
-
- .p2align 4
-L(align64_loop):
- sub $64, %ecx
- sub $64, %edx
- jbe L(exit_loop)
-
- movdqa (%ecx), %xmm0
- movdqa 16(%ecx), %xmm2
- movdqa 32(%ecx), %xmm3
- movdqa 48(%ecx), %xmm4
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm1, %xmm2
- pcmpeqb %xmm1, %xmm3
- pcmpeqb %xmm1, %xmm4
-
- pmaxub %xmm3, %xmm0
- pmaxub %xmm4, %xmm2
- pmaxub %xmm0, %xmm2
- pmovmskb %xmm2, %eax
-
- test %eax, %eax
- jz L(align64_loop)
-
- pmovmskb %xmm4, %eax
- test %eax, %eax
- jnz L(matches48)
-
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 16(%ecx), %xmm2
-
- pcmpeqb %xmm1, %xmm2
- pcmpeqb (%ecx), %xmm1
-
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- pmovmskb %xmm1, %eax
- bsr %eax, %eax
-
- add %ecx, %eax
- ret
-
- .p2align 4
-L(exit_loop):
- add $64, %edx
- cmp $32, %edx
- jbe L(exit_loop_32)
-
- movdqa 48(%ecx), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches48)
-
- movdqa 32(%ecx), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 16(%ecx), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches16_1)
- cmp $48, %edx
- jbe L(return_null)
-
- pcmpeqb (%ecx), %xmm1
- pmovmskb %xmm1, %eax
- test %eax, %eax
- jnz L(matches0_1)
- xor %eax, %eax
- ret
-
- .p2align 4
-L(exit_loop_32):
- movdqa 48(%ecx), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches48_1)
- cmp $16, %edx
- jbe L(return_null)
-
- pcmpeqb 32(%ecx), %xmm1
- pmovmskb %xmm1, %eax
- test %eax, %eax
- jnz L(matches32_1)
- xor %eax, %eax
- ret
-
- .p2align 4
-L(matches0):
- bsr %eax, %eax
- add %ecx, %eax
- ret
-
- .p2align 4
-L(matches16):
- bsr %eax, %eax
- lea 16(%eax, %ecx), %eax
- ret
-
- .p2align 4
-L(matches32):
- bsr %eax, %eax
- lea 32(%eax, %ecx), %eax
- ret
-
- .p2align 4
-L(matches48):
- bsr %eax, %eax
- lea 48(%eax, %ecx), %eax
- ret
-
- .p2align 4
-L(matches0_1):
- bsr %eax, %eax
- sub $64, %edx
- add %eax, %edx
- jl L(return_null)
- add %ecx, %eax
- ret
-
- .p2align 4
-L(matches16_1):
- bsr %eax, %eax
- sub $48, %edx
- add %eax, %edx
- jl L(return_null)
- lea 16(%ecx, %eax), %eax
- ret
-
- .p2align 4
-L(matches32_1):
- bsr %eax, %eax
- sub $32, %edx
- add %eax, %edx
- jl L(return_null)
- lea 32(%ecx, %eax), %eax
- ret
-
- .p2align 4
-L(matches48_1):
- bsr %eax, %eax
- sub $16, %edx
- add %eax, %edx
- jl L(return_null)
- lea 48(%ecx, %eax), %eax
- ret
-
- .p2align 4
-L(return_null):
- xor %eax, %eax
- ret
-
- .p2align 4
-L(length_less16_offset0):
- mov %dl, %cl
- pcmpeqb (%eax), %xmm1
-
- mov $1, %edx
- sal %cl, %edx
- sub $1, %edx
- mov %edx, %ecx
-
- pmovmskb %xmm1, %edx
-
- and %ecx, %edx
- test %edx, %edx
- jz L(return_null)
-
- bsr %edx, %ecx
- add %ecx, %eax
- ret
-
- .p2align 4
-L(length_less16):
- punpcklbw %xmm1, %xmm1
- mov %ecx, %eax
- punpcklbw %xmm1, %xmm1
- add $16, %edx
- jz L(return_null)
-
- pshufd $0, %xmm1, %xmm1
- and $15, %ecx
- jz L(length_less16_offset0)
-
- PUSH (%edi)
- mov %cl, %dh
- add %dl, %dh
- and $-16, %eax
-
- sub $16, %dh
- ja L(length_less16_part2)
-
- pcmpeqb (%eax), %xmm1
- pmovmskb %xmm1, %edi
-
- sar %cl, %edi
- add %ecx, %eax
- mov %dl, %cl
-
- mov $1, %edx
- sal %cl, %edx
- sub $1, %edx
-
- and %edx, %edi
- test %edi, %edi
- jz L(ret_null)
-
- bsr %edi, %edi
- add %edi, %eax
- POP (%edi)
- ret
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(length_less16_part2):
- movdqa 16(%eax), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %edi
-
- mov %cl, %ch
-
- mov %dh, %cl
- mov $1, %edx
- sal %cl, %edx
- sub $1, %edx
-
- and %edx, %edi
-
- test %edi, %edi
- jnz L(length_less16_part2_return)
-
- pcmpeqb (%eax), %xmm1
- pmovmskb %xmm1, %edi
-
- mov %ch, %cl
- sar %cl, %edi
- test %edi, %edi
- jz L(ret_null)
-
- bsr %edi, %edi
- add %edi, %eax
- xor %ch, %ch
- add %ecx, %eax
- POP (%edi)
- ret
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(length_less16_part2_return):
- bsr %edi, %edi
- lea 16(%eax, %edi), %eax
- POP (%edi)
- ret
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(ret_null):
- xor %eax, %eax
- POP (%edi)
- ret
-
-END (MEMCHR)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memrchr-sse2.S b/sysdeps/i386/i686/multiarch/memrchr-sse2.S
deleted file mode 100644
index 5f7853f683..0000000000
--- a/sysdeps/i386/i686/multiarch/memrchr-sse2.S
+++ /dev/null
@@ -1,724 +0,0 @@
-/* Optimized memrchr with sse2 without bsf
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS 4
-# define STR1 PARMS
-# define STR2 STR1+4
-# define LEN STR2+4
-
- atom_text_section
-ENTRY (__memrchr_sse2)
- mov STR1(%esp), %ecx
- movd STR2(%esp), %xmm1
- mov LEN(%esp), %edx
-
- sub $16, %edx
- jbe L(length_less16)
-
- punpcklbw %xmm1, %xmm1
- add %edx, %ecx
- punpcklbw %xmm1, %xmm1
-
- movdqu (%ecx), %xmm0
- pshufd $0, %xmm1, %xmm1
- pcmpeqb %xmm1, %xmm0
-
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(exit_dispatch)
-
- sub $64, %ecx
- mov %ecx, %eax
- and $15, %eax
- jz L(loop_prolog)
-
- lea 16(%ecx), %ecx
- lea 16(%edx), %edx
- sub %eax, %edx
- and $-16, %ecx
-
- .p2align 4
-/* Loop start on aligned string. */
-L(loop_prolog):
- sub $64, %edx
- jbe L(exit_loop)
-
- movdqa 48(%ecx), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches48)
-
- movdqa 32(%ecx), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 16(%ecx), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa (%ecx), %xmm4
- pcmpeqb %xmm1, %xmm4
- pmovmskb %xmm4, %eax
- test %eax, %eax
- jnz L(exit_dispatch)
-
- sub $64, %ecx
- sub $64, %edx
- jbe L(exit_loop)
-
- movdqa 48(%ecx), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches48)
-
- movdqa 32(%ecx), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 16(%ecx), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa (%ecx), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(exit_dispatch)
-
- mov %ecx, %eax
- and $63, %eax
- test %eax, %eax
- jz L(align64_loop)
-
- lea 64(%ecx), %ecx
- lea 64(%edx), %edx
- and $-64, %ecx
- sub %eax, %edx
-
- .p2align 4
-L(align64_loop):
- sub $64, %ecx
- sub $64, %edx
- jbe L(exit_loop)
-
- movdqa (%ecx), %xmm0
- movdqa 16(%ecx), %xmm2
- movdqa 32(%ecx), %xmm3
- movdqa 48(%ecx), %xmm4
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm1, %xmm2
- pcmpeqb %xmm1, %xmm3
- pcmpeqb %xmm1, %xmm4
-
- pmaxub %xmm3, %xmm0
- pmaxub %xmm4, %xmm2
- pmaxub %xmm0, %xmm2
- pmovmskb %xmm2, %eax
-
- test %eax, %eax
- jz L(align64_loop)
-
- pmovmskb %xmm4, %eax
- test %eax, %eax
- jnz L(matches48)
-
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 16(%ecx), %xmm2
-
- pcmpeqb %xmm1, %xmm2
- pcmpeqb (%ecx), %xmm1
-
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- pmovmskb %xmm1, %eax
- test %ah, %ah
- jnz L(exit_dispatch_high)
- mov %al, %dl
- and $15 << 4, %dl
- jnz L(exit_dispatch_8)
- test $0x08, %al
- jnz L(exit_4)
- test $0x04, %al
- jnz L(exit_3)
- test $0x02, %al
- jnz L(exit_2)
- mov %ecx, %eax
- ret
-
- .p2align 4
-L(exit_loop):
- add $64, %edx
- cmp $32, %edx
- jbe L(exit_loop_32)
-
- movdqa 48(%ecx), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches48)
-
- movdqa 32(%ecx), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 16(%ecx), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches16_1)
- cmp $48, %edx
- jbe L(return_null)
-
- pcmpeqb (%ecx), %xmm1
- pmovmskb %xmm1, %eax
- test %eax, %eax
- jnz L(matches0_1)
- xor %eax, %eax
- ret
-
- .p2align 4
-L(exit_loop_32):
- movdqa 48(%ecx), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches48_1)
- cmp $16, %edx
- jbe L(return_null)
-
- pcmpeqb 32(%ecx), %xmm1
- pmovmskb %xmm1, %eax
- test %eax, %eax
- jnz L(matches32_1)
- xor %eax, %eax
- ret
-
- .p2align 4
-L(matches16):
- lea 16(%ecx), %ecx
- test %ah, %ah
- jnz L(exit_dispatch_high)
- mov %al, %dl
- and $15 << 4, %dl
- jnz L(exit_dispatch_8)
- test $0x08, %al
- jnz L(exit_4)
- test $0x04, %al
- jnz L(exit_3)
- test $0x02, %al
- jnz L(exit_2)
- mov %ecx, %eax
- ret
-
- .p2align 4
-L(matches32):
- lea 32(%ecx), %ecx
- test %ah, %ah
- jnz L(exit_dispatch_high)
- mov %al, %dl
- and $15 << 4, %dl
- jnz L(exit_dispatch_8)
- test $0x08, %al
- jnz L(exit_4)
- test $0x04, %al
- jnz L(exit_3)
- test $0x02, %al
- jnz L(exit_2)
- mov %ecx, %eax
- ret
-
- .p2align 4
-L(matches48):
- lea 48(%ecx), %ecx
-
- .p2align 4
-L(exit_dispatch):
- test %ah, %ah
- jnz L(exit_dispatch_high)
- mov %al, %dl
- and $15 << 4, %dl
- jnz L(exit_dispatch_8)
- test $0x08, %al
- jnz L(exit_4)
- test $0x04, %al
- jnz L(exit_3)
- test $0x02, %al
- jnz L(exit_2)
- mov %ecx, %eax
- ret
-
- .p2align 4
-L(exit_dispatch_8):
- test $0x80, %al
- jnz L(exit_8)
- test $0x40, %al
- jnz L(exit_7)
- test $0x20, %al
- jnz L(exit_6)
- lea 4(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_dispatch_high):
- mov %ah, %dh
- and $15 << 4, %dh
- jnz L(exit_dispatch_high_8)
- test $0x08, %ah
- jnz L(exit_12)
- test $0x04, %ah
- jnz L(exit_11)
- test $0x02, %ah
- jnz L(exit_10)
- lea 8(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_dispatch_high_8):
- test $0x80, %ah
- jnz L(exit_16)
- test $0x40, %ah
- jnz L(exit_15)
- test $0x20, %ah
- jnz L(exit_14)
- lea 12(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_2):
- lea 1(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_3):
- lea 2(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_4):
- lea 3(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_6):
- lea 5(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_7):
- lea 6(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_8):
- lea 7(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_10):
- lea 9(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_11):
- lea 10(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_12):
- lea 11(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_14):
- lea 13(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_15):
- lea 14(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_16):
- lea 15(%ecx), %eax
- ret
-
- .p2align 4
-L(matches0_1):
- lea -64(%edx), %edx
-
- test %ah, %ah
- jnz L(exit_dispatch_1_high)
- mov %al, %ah
- and $15 << 4, %ah
- jnz L(exit_dispatch_1_8)
- test $0x08, %al
- jnz L(exit_1_4)
- test $0x04, %al
- jnz L(exit_1_3)
- test $0x02, %al
- jnz L(exit_1_2)
- add $0, %edx
- jl L(return_null)
- mov %ecx, %eax
- ret
-
- .p2align 4
-L(matches16_1):
- lea -48(%edx), %edx
- lea 16(%ecx), %ecx
-
- test %ah, %ah
- jnz L(exit_dispatch_1_high)
- mov %al, %ah
- and $15 << 4, %ah
- jnz L(exit_dispatch_1_8)
- test $0x08, %al
- jnz L(exit_1_4)
- test $0x04, %al
- jnz L(exit_1_3)
- test $0x02, %al
- jnz L(exit_1_2)
- add $0, %edx
- jl L(return_null)
- mov %ecx, %eax
- ret
-
- .p2align 4
-L(matches32_1):
- lea -32(%edx), %edx
- lea 32(%ecx), %ecx
-
- test %ah, %ah
- jnz L(exit_dispatch_1_high)
- mov %al, %ah
- and $15 << 4, %ah
- jnz L(exit_dispatch_1_8)
- test $0x08, %al
- jnz L(exit_1_4)
- test $0x04, %al
- jnz L(exit_1_3)
- test $0x02, %al
- jnz L(exit_1_2)
- add $0, %edx
- jl L(return_null)
- mov %ecx, %eax
- ret
-
- .p2align 4
-L(matches48_1):
- lea -16(%edx), %edx
- lea 48(%ecx), %ecx
-
- .p2align 4
-L(exit_dispatch_1):
- test %ah, %ah
- jnz L(exit_dispatch_1_high)
- mov %al, %ah
- and $15 << 4, %ah
- jnz L(exit_dispatch_1_8)
- test $0x08, %al
- jnz L(exit_1_4)
- test $0x04, %al
- jnz L(exit_1_3)
- test $0x02, %al
- jnz L(exit_1_2)
- add $0, %edx
- jl L(return_null)
- mov %ecx, %eax
- ret
-
- .p2align 4
-L(exit_dispatch_1_8):
- test $0x80, %al
- jnz L(exit_1_8)
- test $0x40, %al
- jnz L(exit_1_7)
- test $0x20, %al
- jnz L(exit_1_6)
- add $4, %edx
- jl L(return_null)
- lea 4(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_dispatch_1_high):
- mov %ah, %al
- and $15 << 4, %al
- jnz L(exit_dispatch_1_high_8)
- test $0x08, %ah
- jnz L(exit_1_12)
- test $0x04, %ah
- jnz L(exit_1_11)
- test $0x02, %ah
- jnz L(exit_1_10)
- add $8, %edx
- jl L(return_null)
- lea 8(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_dispatch_1_high_8):
- test $0x80, %ah
- jnz L(exit_1_16)
- test $0x40, %ah
- jnz L(exit_1_15)
- test $0x20, %ah
- jnz L(exit_1_14)
- add $12, %edx
- jl L(return_null)
- lea 12(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_1_2):
- add $1, %edx
- jl L(return_null)
- lea 1(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_1_3):
- add $2, %edx
- jl L(return_null)
- lea 2(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_1_4):
- add $3, %edx
- jl L(return_null)
- lea 3(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_1_6):
- add $5, %edx
- jl L(return_null)
- lea 5(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_1_7):
- add $6, %edx
- jl L(return_null)
- lea 6(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_1_8):
- add $7, %edx
- jl L(return_null)
- lea 7(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_1_10):
- add $9, %edx
- jl L(return_null)
- lea 9(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_1_11):
- add $10, %edx
- jl L(return_null)
- lea 10(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_1_12):
- add $11, %edx
- jl L(return_null)
- lea 11(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_1_14):
- add $13, %edx
- jl L(return_null)
- lea 13(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_1_15):
- add $14, %edx
- jl L(return_null)
- lea 14(%ecx), %eax
- ret
-
- .p2align 4
-L(exit_1_16):
- add $15, %edx
- jl L(return_null)
- lea 15(%ecx), %eax
- ret
-
- .p2align 4
-L(return_null):
- xor %eax, %eax
- ret
-
- .p2align 4
-L(length_less16_offset0):
- mov %dl, %cl
- pcmpeqb (%eax), %xmm1
-
- mov $1, %edx
- sal %cl, %edx
- sub $1, %edx
-
- mov %eax, %ecx
- pmovmskb %xmm1, %eax
-
- and %edx, %eax
- test %eax, %eax
- jnz L(exit_dispatch)
-
- xor %eax, %eax
- ret
-
- .p2align 4
-L(length_less16):
- punpcklbw %xmm1, %xmm1
- add $16, %edx
- je L(return_null)
- punpcklbw %xmm1, %xmm1
-
- mov %ecx, %eax
- pshufd $0, %xmm1, %xmm1
-
- and $15, %ecx
- jz L(length_less16_offset0)
-
- PUSH (%edi)
-
- mov %cl, %dh
- add %dl, %dh
- and $-16, %eax
-
- sub $16, %dh
- ja L(length_less16_part2)
-
- pcmpeqb (%eax), %xmm1
- pmovmskb %xmm1, %edi
-
- sar %cl, %edi
- add %ecx, %eax
- mov %dl, %cl
-
- mov $1, %edx
- sal %cl, %edx
- sub $1, %edx
-
- and %edx, %edi
- test %edi, %edi
- jz L(ret_null)
-
- bsr %edi, %edi
- add %edi, %eax
- POP (%edi)
- ret
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(length_less16_part2):
- movdqa 16(%eax), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %edi
-
- mov %cl, %ch
-
- mov %dh, %cl
- mov $1, %edx
- sal %cl, %edx
- sub $1, %edx
-
- and %edx, %edi
-
- test %edi, %edi
- jnz L(length_less16_part2_return)
-
- pcmpeqb (%eax), %xmm1
- pmovmskb %xmm1, %edi
-
- mov %ch, %cl
- sar %cl, %edi
- test %edi, %edi
- jz L(ret_null)
-
- bsr %edi, %edi
- add %edi, %eax
- xor %ch, %ch
- add %ecx, %eax
- POP (%edi)
- ret
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(length_less16_part2_return):
- bsr %edi, %edi
- lea 16(%eax, %edi), %eax
- POP (%edi)
- ret
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(ret_null):
- xor %eax, %eax
- POP (%edi)
- ret
-
-END (__memrchr_sse2)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memrchr.S b/sysdeps/i386/i686/multiarch/memrchr.S
deleted file mode 100644
index d4253a553b..0000000000
--- a/sysdeps/i386/i686/multiarch/memrchr.S
+++ /dev/null
@@ -1,45 +0,0 @@
-/* Multiple versions of memrchr
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if IS_IN (libc)
- .text
-ENTRY(__memrchr)
- .type __memrchr, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- HAS_ARCH_FEATURE (Slow_BSF)
- jz 3f
-
- LOAD_FUNC_GOT_EAX (__memrchr_sse2)
- ret
-
-2: LOAD_FUNC_GOT_EAX (__memrchr_ia32)
- ret
-
-3: LOAD_FUNC_GOT_EAX (__memrchr_sse2_bsf)
- ret
-END(__memrchr)
-
-weak_alias(__memrchr, memrchr)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
deleted file mode 100644
index 3221077e49..0000000000
--- a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
+++ /dev/null
@@ -1,811 +0,0 @@
-/* memset with SSE2 and REP string.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-#define PUSH(REG) pushl REG; CFI_PUSH (REG)
-#define POP(REG) popl REG; CFI_POP (REG)
-
-#ifdef USE_AS_BZERO
-# define DEST PARMS
-# define LEN DEST+4
-# define SETRTNVAL
-#else
-# define DEST PARMS
-# define CHR DEST+4
-# define LEN CHR+4
-# define SETRTNVAL movl DEST(%esp), %eax
-#endif
-
-#ifdef SHARED
-# define ENTRANCE PUSH (%ebx);
-# define RETURN_END POP (%ebx); ret
-# define RETURN RETURN_END; CFI_PUSH (%ebx)
-# define PARMS 8 /* Preserve EBX. */
-# define JMPTBL(I, B) I - B
-
-/* Load an entry in a jump table into EBX and branch to it. TABLE is a
- jump table with relative offsets. */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
- /* We first load PC into EBX. */ \
- SETUP_PIC_REG(bx); \
- /* Get the address of the jump table. */ \
- add $(TABLE - .), %ebx; \
- /* Get the entry and convert the relative offset to the \
- absolute address. */ \
- add (%ebx,%ecx,4), %ebx; \
- add %ecx, %edx; \
- /* We loaded the jump table and adjusted EDX. Go. */ \
- jmp *%ebx
-#else
-# define ENTRANCE
-# define RETURN_END ret
-# define RETURN RETURN_END
-# define PARMS 4
-# define JMPTBL(I, B) I
-
-/* Branch to an entry in a jump table. TABLE is a jump table with
- absolute offsets. */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
- add %ecx, %edx; \
- jmp *TABLE(,%ecx,4)
-#endif
-
- .section .text.sse2,"ax",@progbits
-#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO
-ENTRY (__memset_chk_sse2_rep)
- movl 12(%esp), %eax
- cmpl %eax, 16(%esp)
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (__memset_chk_sse2_rep)
-#endif
-ENTRY (__memset_sse2_rep)
- ENTRANCE
-
- movl LEN(%esp), %ecx
-#ifdef USE_AS_BZERO
- xor %eax, %eax
-#else
- movzbl CHR(%esp), %eax
- movb %al, %ah
- /* Fill the whole EAX with pattern. */
- movl %eax, %edx
- shl $16, %eax
- or %edx, %eax
-#endif
- movl DEST(%esp), %edx
- cmp $32, %ecx
- jae L(32bytesormore)
-
-L(write_less32bytes):
- BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes))
-
-
- .pushsection .rodata.sse2,"a",@progbits
- ALIGN (2)
-L(table_less_32bytes):
- .int JMPTBL (L(write_0bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_1bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_2bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_3bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_4bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_5bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_6bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_7bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_8bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_9bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_10bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_11bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_12bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_13bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_14bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_15bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_16bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_17bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_18bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_19bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_20bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_21bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_22bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_23bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_24bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_25bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_26bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_27bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_28bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_29bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_30bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_31bytes), L(table_less_32bytes))
- .popsection
-
- ALIGN (4)
-L(write_28bytes):
- movl %eax, -28(%edx)
-L(write_24bytes):
- movl %eax, -24(%edx)
-L(write_20bytes):
- movl %eax, -20(%edx)
-L(write_16bytes):
- movl %eax, -16(%edx)
-L(write_12bytes):
- movl %eax, -12(%edx)
-L(write_8bytes):
- movl %eax, -8(%edx)
-L(write_4bytes):
- movl %eax, -4(%edx)
-L(write_0bytes):
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(write_29bytes):
- movl %eax, -29(%edx)
-L(write_25bytes):
- movl %eax, -25(%edx)
-L(write_21bytes):
- movl %eax, -21(%edx)
-L(write_17bytes):
- movl %eax, -17(%edx)
-L(write_13bytes):
- movl %eax, -13(%edx)
-L(write_9bytes):
- movl %eax, -9(%edx)
-L(write_5bytes):
- movl %eax, -5(%edx)
-L(write_1bytes):
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(write_30bytes):
- movl %eax, -30(%edx)
-L(write_26bytes):
- movl %eax, -26(%edx)
-L(write_22bytes):
- movl %eax, -22(%edx)
-L(write_18bytes):
- movl %eax, -18(%edx)
-L(write_14bytes):
- movl %eax, -14(%edx)
-L(write_10bytes):
- movl %eax, -10(%edx)
-L(write_6bytes):
- movl %eax, -6(%edx)
-L(write_2bytes):
- movw %ax, -2(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(write_31bytes):
- movl %eax, -31(%edx)
-L(write_27bytes):
- movl %eax, -27(%edx)
-L(write_23bytes):
- movl %eax, -23(%edx)
-L(write_19bytes):
- movl %eax, -19(%edx)
-L(write_15bytes):
- movl %eax, -15(%edx)
-L(write_11bytes):
- movl %eax, -11(%edx)
-L(write_7bytes):
- movl %eax, -7(%edx)
-L(write_3bytes):
- movw %ax, -3(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-/* ECX > 32 and EDX is 4 byte aligned. */
-L(32bytesormore):
- /* Fill xmm0 with the pattern. */
-#ifdef USE_AS_BZERO
- pxor %xmm0, %xmm0
-#else
- movd %eax, %xmm0
- pshufd $0, %xmm0, %xmm0
-#endif
- testl $0xf, %edx
- jz L(aligned_16)
-/* ECX > 32 and EDX is not 16 byte aligned. */
-L(not_aligned_16):
- movdqu %xmm0, (%edx)
- movl %edx, %eax
- and $-16, %edx
- add $16, %edx
- sub %edx, %eax
- add %eax, %ecx
- movd %xmm0, %eax
-
- ALIGN (4)
-L(aligned_16):
- cmp $128, %ecx
- jae L(128bytesormore)
-
-L(aligned_16_less128bytes):
- BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
- ALIGN (4)
-L(128bytesormore):
- PUSH (%edi)
-#ifdef DATA_CACHE_SIZE
- PUSH (%ebx)
- mov $DATA_CACHE_SIZE, %ebx
-#else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- mov __x86_data_cache_size@GOTOFF(%ebx), %ebx
-# else
- PUSH (%ebx)
- mov __x86_data_cache_size, %ebx
-# endif
-#endif
- mov %ebx, %edi
- shr $4, %ebx
- sub %ebx, %edi
-#if defined DATA_CACHE_SIZE || !defined SHARED
- POP (%ebx)
-#endif
-/*
- * When data size approximate the end of L1 cache,
- * fast string will prefetch and combine data efficiently.
- */
- cmp %edi, %ecx
- jae L(128bytesormore_endof_L1)
- subl $128, %ecx
-L(128bytesormore_normal):
- sub $128, %ecx
- movdqa %xmm0, (%edx)
- movdqa %xmm0, 0x10(%edx)
- movdqa %xmm0, 0x20(%edx)
- movdqa %xmm0, 0x30(%edx)
- movdqa %xmm0, 0x40(%edx)
- movdqa %xmm0, 0x50(%edx)
- movdqa %xmm0, 0x60(%edx)
- movdqa %xmm0, 0x70(%edx)
- lea 128(%edx), %edx
- jb L(128bytesless_normal)
-
-
- sub $128, %ecx
- movdqa %xmm0, (%edx)
- movdqa %xmm0, 0x10(%edx)
- movdqa %xmm0, 0x20(%edx)
- movdqa %xmm0, 0x30(%edx)
- movdqa %xmm0, 0x40(%edx)
- movdqa %xmm0, 0x50(%edx)
- movdqa %xmm0, 0x60(%edx)
- movdqa %xmm0, 0x70(%edx)
- lea 128(%edx), %edx
- jae L(128bytesormore_normal)
-
-L(128bytesless_normal):
- POP (%edi)
- add $128, %ecx
- BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
- CFI_PUSH (%edi)
- ALIGN (4)
-L(128bytesormore_endof_L1):
- mov %edx, %edi
- mov %ecx, %edx
- shr $2, %ecx
- and $3, %edx
- rep stosl
- jz L(copy_page_by_rep_exit)
- cmp $2, %edx
- jb L(copy_page_by_rep_left_1)
- movw %ax, (%edi)
- add $2, %edi
- sub $2, %edx
- jz L(copy_page_by_rep_exit)
-L(copy_page_by_rep_left_1):
- movb %al, (%edi)
-L(copy_page_by_rep_exit):
- POP (%edi)
- SETRTNVAL
- RETURN
-
- .pushsection .rodata.sse2,"a",@progbits
- ALIGN (2)
-L(table_16_128bytes):
- .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
- .popsection
-
- ALIGN (4)
-L(aligned_16_112bytes):
- movdqa %xmm0, -112(%edx)
-L(aligned_16_96bytes):
- movdqa %xmm0, -96(%edx)
-L(aligned_16_80bytes):
- movdqa %xmm0, -80(%edx)
-L(aligned_16_64bytes):
- movdqa %xmm0, -64(%edx)
-L(aligned_16_48bytes):
- movdqa %xmm0, -48(%edx)
-L(aligned_16_32bytes):
- movdqa %xmm0, -32(%edx)
-L(aligned_16_16bytes):
- movdqa %xmm0, -16(%edx)
-L(aligned_16_0bytes):
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_113bytes):
- movdqa %xmm0, -113(%edx)
-L(aligned_16_97bytes):
- movdqa %xmm0, -97(%edx)
-L(aligned_16_81bytes):
- movdqa %xmm0, -81(%edx)
-L(aligned_16_65bytes):
- movdqa %xmm0, -65(%edx)
-L(aligned_16_49bytes):
- movdqa %xmm0, -49(%edx)
-L(aligned_16_33bytes):
- movdqa %xmm0, -33(%edx)
-L(aligned_16_17bytes):
- movdqa %xmm0, -17(%edx)
-L(aligned_16_1bytes):
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_114bytes):
- movdqa %xmm0, -114(%edx)
-L(aligned_16_98bytes):
- movdqa %xmm0, -98(%edx)
-L(aligned_16_82bytes):
- movdqa %xmm0, -82(%edx)
-L(aligned_16_66bytes):
- movdqa %xmm0, -66(%edx)
-L(aligned_16_50bytes):
- movdqa %xmm0, -50(%edx)
-L(aligned_16_34bytes):
- movdqa %xmm0, -34(%edx)
-L(aligned_16_18bytes):
- movdqa %xmm0, -18(%edx)
-L(aligned_16_2bytes):
- movw %ax, -2(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_115bytes):
- movdqa %xmm0, -115(%edx)
-L(aligned_16_99bytes):
- movdqa %xmm0, -99(%edx)
-L(aligned_16_83bytes):
- movdqa %xmm0, -83(%edx)
-L(aligned_16_67bytes):
- movdqa %xmm0, -67(%edx)
-L(aligned_16_51bytes):
- movdqa %xmm0, -51(%edx)
-L(aligned_16_35bytes):
- movdqa %xmm0, -35(%edx)
-L(aligned_16_19bytes):
- movdqa %xmm0, -19(%edx)
-L(aligned_16_3bytes):
- movw %ax, -3(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_116bytes):
- movdqa %xmm0, -116(%edx)
-L(aligned_16_100bytes):
- movdqa %xmm0, -100(%edx)
-L(aligned_16_84bytes):
- movdqa %xmm0, -84(%edx)
-L(aligned_16_68bytes):
- movdqa %xmm0, -68(%edx)
-L(aligned_16_52bytes):
- movdqa %xmm0, -52(%edx)
-L(aligned_16_36bytes):
- movdqa %xmm0, -36(%edx)
-L(aligned_16_20bytes):
- movdqa %xmm0, -20(%edx)
-L(aligned_16_4bytes):
- movl %eax, -4(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_117bytes):
- movdqa %xmm0, -117(%edx)
-L(aligned_16_101bytes):
- movdqa %xmm0, -101(%edx)
-L(aligned_16_85bytes):
- movdqa %xmm0, -85(%edx)
-L(aligned_16_69bytes):
- movdqa %xmm0, -69(%edx)
-L(aligned_16_53bytes):
- movdqa %xmm0, -53(%edx)
-L(aligned_16_37bytes):
- movdqa %xmm0, -37(%edx)
-L(aligned_16_21bytes):
- movdqa %xmm0, -21(%edx)
-L(aligned_16_5bytes):
- movl %eax, -5(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_118bytes):
- movdqa %xmm0, -118(%edx)
-L(aligned_16_102bytes):
- movdqa %xmm0, -102(%edx)
-L(aligned_16_86bytes):
- movdqa %xmm0, -86(%edx)
-L(aligned_16_70bytes):
- movdqa %xmm0, -70(%edx)
-L(aligned_16_54bytes):
- movdqa %xmm0, -54(%edx)
-L(aligned_16_38bytes):
- movdqa %xmm0, -38(%edx)
-L(aligned_16_22bytes):
- movdqa %xmm0, -22(%edx)
-L(aligned_16_6bytes):
- movl %eax, -6(%edx)
- movw %ax, -2(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_119bytes):
- movdqa %xmm0, -119(%edx)
-L(aligned_16_103bytes):
- movdqa %xmm0, -103(%edx)
-L(aligned_16_87bytes):
- movdqa %xmm0, -87(%edx)
-L(aligned_16_71bytes):
- movdqa %xmm0, -71(%edx)
-L(aligned_16_55bytes):
- movdqa %xmm0, -55(%edx)
-L(aligned_16_39bytes):
- movdqa %xmm0, -39(%edx)
-L(aligned_16_23bytes):
- movdqa %xmm0, -23(%edx)
-L(aligned_16_7bytes):
- movl %eax, -7(%edx)
- movw %ax, -3(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_120bytes):
- movdqa %xmm0, -120(%edx)
-L(aligned_16_104bytes):
- movdqa %xmm0, -104(%edx)
-L(aligned_16_88bytes):
- movdqa %xmm0, -88(%edx)
-L(aligned_16_72bytes):
- movdqa %xmm0, -72(%edx)
-L(aligned_16_56bytes):
- movdqa %xmm0, -56(%edx)
-L(aligned_16_40bytes):
- movdqa %xmm0, -40(%edx)
-L(aligned_16_24bytes):
- movdqa %xmm0, -24(%edx)
-L(aligned_16_8bytes):
- movq %xmm0, -8(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_121bytes):
- movdqa %xmm0, -121(%edx)
-L(aligned_16_105bytes):
- movdqa %xmm0, -105(%edx)
-L(aligned_16_89bytes):
- movdqa %xmm0, -89(%edx)
-L(aligned_16_73bytes):
- movdqa %xmm0, -73(%edx)
-L(aligned_16_57bytes):
- movdqa %xmm0, -57(%edx)
-L(aligned_16_41bytes):
- movdqa %xmm0, -41(%edx)
-L(aligned_16_25bytes):
- movdqa %xmm0, -25(%edx)
-L(aligned_16_9bytes):
- movq %xmm0, -9(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_122bytes):
- movdqa %xmm0, -122(%edx)
-L(aligned_16_106bytes):
- movdqa %xmm0, -106(%edx)
-L(aligned_16_90bytes):
- movdqa %xmm0, -90(%edx)
-L(aligned_16_74bytes):
- movdqa %xmm0, -74(%edx)
-L(aligned_16_58bytes):
- movdqa %xmm0, -58(%edx)
-L(aligned_16_42bytes):
- movdqa %xmm0, -42(%edx)
-L(aligned_16_26bytes):
- movdqa %xmm0, -26(%edx)
-L(aligned_16_10bytes):
- movq %xmm0, -10(%edx)
- movw %ax, -2(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_123bytes):
- movdqa %xmm0, -123(%edx)
-L(aligned_16_107bytes):
- movdqa %xmm0, -107(%edx)
-L(aligned_16_91bytes):
- movdqa %xmm0, -91(%edx)
-L(aligned_16_75bytes):
- movdqa %xmm0, -75(%edx)
-L(aligned_16_59bytes):
- movdqa %xmm0, -59(%edx)
-L(aligned_16_43bytes):
- movdqa %xmm0, -43(%edx)
-L(aligned_16_27bytes):
- movdqa %xmm0, -27(%edx)
-L(aligned_16_11bytes):
- movq %xmm0, -11(%edx)
- movw %ax, -3(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_124bytes):
- movdqa %xmm0, -124(%edx)
-L(aligned_16_108bytes):
- movdqa %xmm0, -108(%edx)
-L(aligned_16_92bytes):
- movdqa %xmm0, -92(%edx)
-L(aligned_16_76bytes):
- movdqa %xmm0, -76(%edx)
-L(aligned_16_60bytes):
- movdqa %xmm0, -60(%edx)
-L(aligned_16_44bytes):
- movdqa %xmm0, -44(%edx)
-L(aligned_16_28bytes):
- movdqa %xmm0, -28(%edx)
-L(aligned_16_12bytes):
- movq %xmm0, -12(%edx)
- movl %eax, -4(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_125bytes):
- movdqa %xmm0, -125(%edx)
-L(aligned_16_109bytes):
- movdqa %xmm0, -109(%edx)
-L(aligned_16_93bytes):
- movdqa %xmm0, -93(%edx)
-L(aligned_16_77bytes):
- movdqa %xmm0, -77(%edx)
-L(aligned_16_61bytes):
- movdqa %xmm0, -61(%edx)
-L(aligned_16_45bytes):
- movdqa %xmm0, -45(%edx)
-L(aligned_16_29bytes):
- movdqa %xmm0, -29(%edx)
-L(aligned_16_13bytes):
- movq %xmm0, -13(%edx)
- movl %eax, -5(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_126bytes):
- movdqa %xmm0, -126(%edx)
-L(aligned_16_110bytes):
- movdqa %xmm0, -110(%edx)
-L(aligned_16_94bytes):
- movdqa %xmm0, -94(%edx)
-L(aligned_16_78bytes):
- movdqa %xmm0, -78(%edx)
-L(aligned_16_62bytes):
- movdqa %xmm0, -62(%edx)
-L(aligned_16_46bytes):
- movdqa %xmm0, -46(%edx)
-L(aligned_16_30bytes):
- movdqa %xmm0, -30(%edx)
-L(aligned_16_14bytes):
- movq %xmm0, -14(%edx)
- movl %eax, -6(%edx)
- movw %ax, -2(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_127bytes):
- movdqa %xmm0, -127(%edx)
-L(aligned_16_111bytes):
- movdqa %xmm0, -111(%edx)
-L(aligned_16_95bytes):
- movdqa %xmm0, -95(%edx)
-L(aligned_16_79bytes):
- movdqa %xmm0, -79(%edx)
-L(aligned_16_63bytes):
- movdqa %xmm0, -63(%edx)
-L(aligned_16_47bytes):
- movdqa %xmm0, -47(%edx)
-L(aligned_16_31bytes):
- movdqa %xmm0, -31(%edx)
-L(aligned_16_15bytes):
- movq %xmm0, -15(%edx)
- movl %eax, -7(%edx)
- movw %ax, -3(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN_END
-
-END (__memset_sse2_rep)
-
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memset-sse2.S b/sysdeps/i386/i686/multiarch/memset-sse2.S
deleted file mode 100644
index d7b8be9114..0000000000
--- a/sysdeps/i386/i686/multiarch/memset-sse2.S
+++ /dev/null
@@ -1,860 +0,0 @@
-/* memset with SSE2
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-#define PUSH(REG) pushl REG; CFI_PUSH (REG)
-#define POP(REG) popl REG; CFI_POP (REG)
-
-#ifdef USE_AS_BZERO
-# define DEST PARMS
-# define LEN DEST+4
-# define SETRTNVAL
-#else
-# define DEST PARMS
-# define CHR DEST+4
-# define LEN CHR+4
-# define SETRTNVAL movl DEST(%esp), %eax
-#endif
-
-#ifdef SHARED
-# define ENTRANCE PUSH (%ebx);
-# define RETURN_END POP (%ebx); ret
-# define RETURN RETURN_END; CFI_PUSH (%ebx)
-# define PARMS 8 /* Preserve EBX. */
-# define JMPTBL(I, B) I - B
-
-/* Load an entry in a jump table into EBX and branch to it. TABLE is a
- jump table with relative offsets. */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
- /* We first load PC into EBX. */ \
- SETUP_PIC_REG(bx); \
- /* Get the address of the jump table. */ \
- add $(TABLE - .), %ebx; \
- /* Get the entry and convert the relative offset to the \
- absolute address. */ \
- add (%ebx,%ecx,4), %ebx; \
- add %ecx, %edx; \
- /* We loaded the jump table and adjusted EDX. Go. */ \
- jmp *%ebx
-#else
-# define ENTRANCE
-# define RETURN_END ret
-# define RETURN RETURN_END
-# define PARMS 4
-# define JMPTBL(I, B) I
-
-/* Branch to an entry in a jump table. TABLE is a jump table with
- absolute offsets. */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
- add %ecx, %edx; \
- jmp *TABLE(,%ecx,4)
-#endif
-
- .section .text.sse2,"ax",@progbits
-#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO
-ENTRY (__memset_chk_sse2)
- movl 12(%esp), %eax
- cmpl %eax, 16(%esp)
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (__memset_chk_sse2)
-#endif
-ENTRY (__memset_sse2)
- ENTRANCE
-
- movl LEN(%esp), %ecx
-#ifdef USE_AS_BZERO
- xor %eax, %eax
-#else
- movzbl CHR(%esp), %eax
- movb %al, %ah
- /* Fill the whole EAX with pattern. */
- movl %eax, %edx
- shl $16, %eax
- or %edx, %eax
-#endif
- movl DEST(%esp), %edx
- cmp $32, %ecx
- jae L(32bytesormore)
-
-L(write_less32bytes):
- BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes))
-
-
- .pushsection .rodata.sse2,"a",@progbits
- ALIGN (2)
-L(table_less_32bytes):
- .int JMPTBL (L(write_0bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_1bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_2bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_3bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_4bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_5bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_6bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_7bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_8bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_9bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_10bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_11bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_12bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_13bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_14bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_15bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_16bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_17bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_18bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_19bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_20bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_21bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_22bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_23bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_24bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_25bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_26bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_27bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_28bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_29bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_30bytes), L(table_less_32bytes))
- .int JMPTBL (L(write_31bytes), L(table_less_32bytes))
- .popsection
-
- ALIGN (4)
-L(write_28bytes):
- movl %eax, -28(%edx)
-L(write_24bytes):
- movl %eax, -24(%edx)
-L(write_20bytes):
- movl %eax, -20(%edx)
-L(write_16bytes):
- movl %eax, -16(%edx)
-L(write_12bytes):
- movl %eax, -12(%edx)
-L(write_8bytes):
- movl %eax, -8(%edx)
-L(write_4bytes):
- movl %eax, -4(%edx)
-L(write_0bytes):
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(write_29bytes):
- movl %eax, -29(%edx)
-L(write_25bytes):
- movl %eax, -25(%edx)
-L(write_21bytes):
- movl %eax, -21(%edx)
-L(write_17bytes):
- movl %eax, -17(%edx)
-L(write_13bytes):
- movl %eax, -13(%edx)
-L(write_9bytes):
- movl %eax, -9(%edx)
-L(write_5bytes):
- movl %eax, -5(%edx)
-L(write_1bytes):
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(write_30bytes):
- movl %eax, -30(%edx)
-L(write_26bytes):
- movl %eax, -26(%edx)
-L(write_22bytes):
- movl %eax, -22(%edx)
-L(write_18bytes):
- movl %eax, -18(%edx)
-L(write_14bytes):
- movl %eax, -14(%edx)
-L(write_10bytes):
- movl %eax, -10(%edx)
-L(write_6bytes):
- movl %eax, -6(%edx)
-L(write_2bytes):
- movw %ax, -2(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(write_31bytes):
- movl %eax, -31(%edx)
-L(write_27bytes):
- movl %eax, -27(%edx)
-L(write_23bytes):
- movl %eax, -23(%edx)
-L(write_19bytes):
- movl %eax, -19(%edx)
-L(write_15bytes):
- movl %eax, -15(%edx)
-L(write_11bytes):
- movl %eax, -11(%edx)
-L(write_7bytes):
- movl %eax, -7(%edx)
-L(write_3bytes):
- movw %ax, -3(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-/* ECX > 32 and EDX is 4 byte aligned. */
-L(32bytesormore):
- /* Fill xmm0 with the pattern. */
-#ifdef USE_AS_BZERO
- pxor %xmm0, %xmm0
-#else
- movd %eax, %xmm0
- pshufd $0, %xmm0, %xmm0
-#endif
- testl $0xf, %edx
- jz L(aligned_16)
-/* ECX > 32 and EDX is not 16 byte aligned. */
-L(not_aligned_16):
- movdqu %xmm0, (%edx)
- movl %edx, %eax
- and $-16, %edx
- add $16, %edx
- sub %edx, %eax
- add %eax, %ecx
- movd %xmm0, %eax
-
- ALIGN (4)
-L(aligned_16):
- cmp $128, %ecx
- jae L(128bytesormore)
-
-L(aligned_16_less128bytes):
- BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
- ALIGN (4)
-L(128bytesormore):
-#ifdef SHARED_CACHE_SIZE
- PUSH (%ebx)
- mov $SHARED_CACHE_SIZE, %ebx
-#else
-# ifdef SHARED
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx
-# else
- PUSH (%ebx)
- mov __x86_shared_cache_size, %ebx
-# endif
-#endif
- cmp %ebx, %ecx
- jae L(128bytesormore_nt_start)
-
-
-#ifdef DATA_CACHE_SIZE
- POP (%ebx)
-# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
- cmp $DATA_CACHE_SIZE, %ecx
-#else
-# ifdef SHARED
-# define RESTORE_EBX_STATE
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx
-# else
- POP (%ebx)
-# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
- cmp __x86_data_cache_size, %ecx
-# endif
-#endif
-
- jae L(128bytes_L2_normal)
- subl $128, %ecx
-L(128bytesormore_normal):
- sub $128, %ecx
- movdqa %xmm0, (%edx)
- movdqa %xmm0, 0x10(%edx)
- movdqa %xmm0, 0x20(%edx)
- movdqa %xmm0, 0x30(%edx)
- movdqa %xmm0, 0x40(%edx)
- movdqa %xmm0, 0x50(%edx)
- movdqa %xmm0, 0x60(%edx)
- movdqa %xmm0, 0x70(%edx)
- lea 128(%edx), %edx
- jb L(128bytesless_normal)
-
-
- sub $128, %ecx
- movdqa %xmm0, (%edx)
- movdqa %xmm0, 0x10(%edx)
- movdqa %xmm0, 0x20(%edx)
- movdqa %xmm0, 0x30(%edx)
- movdqa %xmm0, 0x40(%edx)
- movdqa %xmm0, 0x50(%edx)
- movdqa %xmm0, 0x60(%edx)
- movdqa %xmm0, 0x70(%edx)
- lea 128(%edx), %edx
- jae L(128bytesormore_normal)
-
-L(128bytesless_normal):
- add $128, %ecx
- BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
- ALIGN (4)
-L(128bytes_L2_normal):
- prefetcht0 0x380(%edx)
- prefetcht0 0x3c0(%edx)
- sub $128, %ecx
- movdqa %xmm0, (%edx)
- movaps %xmm0, 0x10(%edx)
- movaps %xmm0, 0x20(%edx)
- movaps %xmm0, 0x30(%edx)
- movaps %xmm0, 0x40(%edx)
- movaps %xmm0, 0x50(%edx)
- movaps %xmm0, 0x60(%edx)
- movaps %xmm0, 0x70(%edx)
- add $128, %edx
- cmp $128, %ecx
- jae L(128bytes_L2_normal)
-
-L(128bytesless_L2_normal):
- BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
- RESTORE_EBX_STATE
-L(128bytesormore_nt_start):
- sub %ebx, %ecx
- ALIGN (4)
-L(128bytesormore_shared_cache_loop):
- prefetcht0 0x3c0(%edx)
- prefetcht0 0x380(%edx)
- sub $0x80, %ebx
- movdqa %xmm0, (%edx)
- movdqa %xmm0, 0x10(%edx)
- movdqa %xmm0, 0x20(%edx)
- movdqa %xmm0, 0x30(%edx)
- movdqa %xmm0, 0x40(%edx)
- movdqa %xmm0, 0x50(%edx)
- movdqa %xmm0, 0x60(%edx)
- movdqa %xmm0, 0x70(%edx)
- add $0x80, %edx
- cmp $0x80, %ebx
- jae L(128bytesormore_shared_cache_loop)
- cmp $0x80, %ecx
- jb L(shared_cache_loop_end)
- ALIGN (4)
-L(128bytesormore_nt):
- sub $0x80, %ecx
- movntdq %xmm0, (%edx)
- movntdq %xmm0, 0x10(%edx)
- movntdq %xmm0, 0x20(%edx)
- movntdq %xmm0, 0x30(%edx)
- movntdq %xmm0, 0x40(%edx)
- movntdq %xmm0, 0x50(%edx)
- movntdq %xmm0, 0x60(%edx)
- movntdq %xmm0, 0x70(%edx)
- add $0x80, %edx
- cmp $0x80, %ecx
- jae L(128bytesormore_nt)
- sfence
-L(shared_cache_loop_end):
-#if defined DATA_CACHE_SIZE || !defined SHARED
- POP (%ebx)
-#endif
- BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
-
-
- .pushsection .rodata.sse2,"a",@progbits
- ALIGN (2)
-L(table_16_128bytes):
- .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
- .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
- .popsection
-
- ALIGN (4)
-L(aligned_16_112bytes):
- movdqa %xmm0, -112(%edx)
-L(aligned_16_96bytes):
- movdqa %xmm0, -96(%edx)
-L(aligned_16_80bytes):
- movdqa %xmm0, -80(%edx)
-L(aligned_16_64bytes):
- movdqa %xmm0, -64(%edx)
-L(aligned_16_48bytes):
- movdqa %xmm0, -48(%edx)
-L(aligned_16_32bytes):
- movdqa %xmm0, -32(%edx)
-L(aligned_16_16bytes):
- movdqa %xmm0, -16(%edx)
-L(aligned_16_0bytes):
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_113bytes):
- movdqa %xmm0, -113(%edx)
-L(aligned_16_97bytes):
- movdqa %xmm0, -97(%edx)
-L(aligned_16_81bytes):
- movdqa %xmm0, -81(%edx)
-L(aligned_16_65bytes):
- movdqa %xmm0, -65(%edx)
-L(aligned_16_49bytes):
- movdqa %xmm0, -49(%edx)
-L(aligned_16_33bytes):
- movdqa %xmm0, -33(%edx)
-L(aligned_16_17bytes):
- movdqa %xmm0, -17(%edx)
-L(aligned_16_1bytes):
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_114bytes):
- movdqa %xmm0, -114(%edx)
-L(aligned_16_98bytes):
- movdqa %xmm0, -98(%edx)
-L(aligned_16_82bytes):
- movdqa %xmm0, -82(%edx)
-L(aligned_16_66bytes):
- movdqa %xmm0, -66(%edx)
-L(aligned_16_50bytes):
- movdqa %xmm0, -50(%edx)
-L(aligned_16_34bytes):
- movdqa %xmm0, -34(%edx)
-L(aligned_16_18bytes):
- movdqa %xmm0, -18(%edx)
-L(aligned_16_2bytes):
- movw %ax, -2(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_115bytes):
- movdqa %xmm0, -115(%edx)
-L(aligned_16_99bytes):
- movdqa %xmm0, -99(%edx)
-L(aligned_16_83bytes):
- movdqa %xmm0, -83(%edx)
-L(aligned_16_67bytes):
- movdqa %xmm0, -67(%edx)
-L(aligned_16_51bytes):
- movdqa %xmm0, -51(%edx)
-L(aligned_16_35bytes):
- movdqa %xmm0, -35(%edx)
-L(aligned_16_19bytes):
- movdqa %xmm0, -19(%edx)
-L(aligned_16_3bytes):
- movw %ax, -3(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_116bytes):
- movdqa %xmm0, -116(%edx)
-L(aligned_16_100bytes):
- movdqa %xmm0, -100(%edx)
-L(aligned_16_84bytes):
- movdqa %xmm0, -84(%edx)
-L(aligned_16_68bytes):
- movdqa %xmm0, -68(%edx)
-L(aligned_16_52bytes):
- movdqa %xmm0, -52(%edx)
-L(aligned_16_36bytes):
- movdqa %xmm0, -36(%edx)
-L(aligned_16_20bytes):
- movdqa %xmm0, -20(%edx)
-L(aligned_16_4bytes):
- movl %eax, -4(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_117bytes):
- movdqa %xmm0, -117(%edx)
-L(aligned_16_101bytes):
- movdqa %xmm0, -101(%edx)
-L(aligned_16_85bytes):
- movdqa %xmm0, -85(%edx)
-L(aligned_16_69bytes):
- movdqa %xmm0, -69(%edx)
-L(aligned_16_53bytes):
- movdqa %xmm0, -53(%edx)
-L(aligned_16_37bytes):
- movdqa %xmm0, -37(%edx)
-L(aligned_16_21bytes):
- movdqa %xmm0, -21(%edx)
-L(aligned_16_5bytes):
- movl %eax, -5(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_118bytes):
- movdqa %xmm0, -118(%edx)
-L(aligned_16_102bytes):
- movdqa %xmm0, -102(%edx)
-L(aligned_16_86bytes):
- movdqa %xmm0, -86(%edx)
-L(aligned_16_70bytes):
- movdqa %xmm0, -70(%edx)
-L(aligned_16_54bytes):
- movdqa %xmm0, -54(%edx)
-L(aligned_16_38bytes):
- movdqa %xmm0, -38(%edx)
-L(aligned_16_22bytes):
- movdqa %xmm0, -22(%edx)
-L(aligned_16_6bytes):
- movl %eax, -6(%edx)
- movw %ax, -2(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_119bytes):
- movdqa %xmm0, -119(%edx)
-L(aligned_16_103bytes):
- movdqa %xmm0, -103(%edx)
-L(aligned_16_87bytes):
- movdqa %xmm0, -87(%edx)
-L(aligned_16_71bytes):
- movdqa %xmm0, -71(%edx)
-L(aligned_16_55bytes):
- movdqa %xmm0, -55(%edx)
-L(aligned_16_39bytes):
- movdqa %xmm0, -39(%edx)
-L(aligned_16_23bytes):
- movdqa %xmm0, -23(%edx)
-L(aligned_16_7bytes):
- movl %eax, -7(%edx)
- movw %ax, -3(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_120bytes):
- movdqa %xmm0, -120(%edx)
-L(aligned_16_104bytes):
- movdqa %xmm0, -104(%edx)
-L(aligned_16_88bytes):
- movdqa %xmm0, -88(%edx)
-L(aligned_16_72bytes):
- movdqa %xmm0, -72(%edx)
-L(aligned_16_56bytes):
- movdqa %xmm0, -56(%edx)
-L(aligned_16_40bytes):
- movdqa %xmm0, -40(%edx)
-L(aligned_16_24bytes):
- movdqa %xmm0, -24(%edx)
-L(aligned_16_8bytes):
- movq %xmm0, -8(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_121bytes):
- movdqa %xmm0, -121(%edx)
-L(aligned_16_105bytes):
- movdqa %xmm0, -105(%edx)
-L(aligned_16_89bytes):
- movdqa %xmm0, -89(%edx)
-L(aligned_16_73bytes):
- movdqa %xmm0, -73(%edx)
-L(aligned_16_57bytes):
- movdqa %xmm0, -57(%edx)
-L(aligned_16_41bytes):
- movdqa %xmm0, -41(%edx)
-L(aligned_16_25bytes):
- movdqa %xmm0, -25(%edx)
-L(aligned_16_9bytes):
- movq %xmm0, -9(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_122bytes):
- movdqa %xmm0, -122(%edx)
-L(aligned_16_106bytes):
- movdqa %xmm0, -106(%edx)
-L(aligned_16_90bytes):
- movdqa %xmm0, -90(%edx)
-L(aligned_16_74bytes):
- movdqa %xmm0, -74(%edx)
-L(aligned_16_58bytes):
- movdqa %xmm0, -58(%edx)
-L(aligned_16_42bytes):
- movdqa %xmm0, -42(%edx)
-L(aligned_16_26bytes):
- movdqa %xmm0, -26(%edx)
-L(aligned_16_10bytes):
- movq %xmm0, -10(%edx)
- movw %ax, -2(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_123bytes):
- movdqa %xmm0, -123(%edx)
-L(aligned_16_107bytes):
- movdqa %xmm0, -107(%edx)
-L(aligned_16_91bytes):
- movdqa %xmm0, -91(%edx)
-L(aligned_16_75bytes):
- movdqa %xmm0, -75(%edx)
-L(aligned_16_59bytes):
- movdqa %xmm0, -59(%edx)
-L(aligned_16_43bytes):
- movdqa %xmm0, -43(%edx)
-L(aligned_16_27bytes):
- movdqa %xmm0, -27(%edx)
-L(aligned_16_11bytes):
- movq %xmm0, -11(%edx)
- movw %ax, -3(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_124bytes):
- movdqa %xmm0, -124(%edx)
-L(aligned_16_108bytes):
- movdqa %xmm0, -108(%edx)
-L(aligned_16_92bytes):
- movdqa %xmm0, -92(%edx)
-L(aligned_16_76bytes):
- movdqa %xmm0, -76(%edx)
-L(aligned_16_60bytes):
- movdqa %xmm0, -60(%edx)
-L(aligned_16_44bytes):
- movdqa %xmm0, -44(%edx)
-L(aligned_16_28bytes):
- movdqa %xmm0, -28(%edx)
-L(aligned_16_12bytes):
- movq %xmm0, -12(%edx)
- movl %eax, -4(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_125bytes):
- movdqa %xmm0, -125(%edx)
-L(aligned_16_109bytes):
- movdqa %xmm0, -109(%edx)
-L(aligned_16_93bytes):
- movdqa %xmm0, -93(%edx)
-L(aligned_16_77bytes):
- movdqa %xmm0, -77(%edx)
-L(aligned_16_61bytes):
- movdqa %xmm0, -61(%edx)
-L(aligned_16_45bytes):
- movdqa %xmm0, -45(%edx)
-L(aligned_16_29bytes):
- movdqa %xmm0, -29(%edx)
-L(aligned_16_13bytes):
- movq %xmm0, -13(%edx)
- movl %eax, -5(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_126bytes):
- movdqa %xmm0, -126(%edx)
-L(aligned_16_110bytes):
- movdqa %xmm0, -110(%edx)
-L(aligned_16_94bytes):
- movdqa %xmm0, -94(%edx)
-L(aligned_16_78bytes):
- movdqa %xmm0, -78(%edx)
-L(aligned_16_62bytes):
- movdqa %xmm0, -62(%edx)
-L(aligned_16_46bytes):
- movdqa %xmm0, -46(%edx)
-L(aligned_16_30bytes):
- movdqa %xmm0, -30(%edx)
-L(aligned_16_14bytes):
- movq %xmm0, -14(%edx)
- movl %eax, -6(%edx)
- movw %ax, -2(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN (4)
-L(aligned_16_127bytes):
- movdqa %xmm0, -127(%edx)
-L(aligned_16_111bytes):
- movdqa %xmm0, -111(%edx)
-L(aligned_16_95bytes):
- movdqa %xmm0, -95(%edx)
-L(aligned_16_79bytes):
- movdqa %xmm0, -79(%edx)
-L(aligned_16_63bytes):
- movdqa %xmm0, -63(%edx)
-L(aligned_16_47bytes):
- movdqa %xmm0, -47(%edx)
-L(aligned_16_31bytes):
- movdqa %xmm0, -31(%edx)
-L(aligned_16_15bytes):
- movq %xmm0, -15(%edx)
- movl %eax, -7(%edx)
- movw %ax, -3(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN_END
-
-END (__memset_sse2)
-
-#endif
diff --git a/sysdeps/i386/i686/multiarch/memset.S b/sysdeps/i386/i686/multiarch/memset.S
deleted file mode 100644
index f601663a9f..0000000000
--- a/sysdeps/i386/i686/multiarch/memset.S
+++ /dev/null
@@ -1,75 +0,0 @@
-/* Multiple versions of memset
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib. */
-#if IS_IN (libc)
- .text
-ENTRY(memset)
- .type memset, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__memset_ia32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__memset_sse2)
- HAS_ARCH_FEATURE (Fast_Rep_String)
- jz 2f
- LOAD_FUNC_GOT_EAX (__memset_sse2_rep)
-2: ret
-END(memset)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __memset_ia32, @function; \
- .globl __memset_ia32; \
- .p2align 4; \
- __memset_ia32: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __memset_ia32, .-__memset_ia32
-
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
- .type __memset_chk_ia32, @function; \
- .globl __memset_chk_ia32; \
- .p2align 4; \
- __memset_chk_ia32: cfi_startproc; \
- CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
- cfi_endproc; .size __memset_chk_ia32, .-__memset_chk_ia32
-
-# ifdef SHARED
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
- they will be called without setting up EBX needed for PLT which is
- used by IFUNC. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_memset; __GI_memset = __memset_ia32
-# endif
-
-# undef strong_alias
-# define strong_alias(original, alias)
-#endif
-
-#include "../memset.S"
diff --git a/sysdeps/i386/i686/multiarch/memset_chk.S b/sysdeps/i386/i686/multiarch/memset_chk.S
deleted file mode 100644
index 573cf4208a..0000000000
--- a/sysdeps/i386/i686/multiarch/memset_chk.S
+++ /dev/null
@@ -1,82 +0,0 @@
-/* Multiple versions of __memset_chk
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib. */
-#if IS_IN (libc)
- .text
-ENTRY(__memset_chk)
- .type __memset_chk, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__memset_chk_ia32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__memset_chk_sse2)
- HAS_ARCH_FEATURE (Fast_Rep_String)
- jz 2f
- LOAD_FUNC_GOT_EAX (__memset_chk_sse2_rep)
-2: ret
-END(__memset_chk)
-
-# ifdef SHARED
-strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
- .section .gnu.warning.__memset_zero_constant_len_parameter
- .string "memset used with constant zero length parameter; this could be due to transposed parameters"
-# else
- .text
- .type __memset_chk_sse2, @function
- .p2align 4;
-__memset_chk_sse2:
- cfi_startproc
- CALL_MCOUNT
- movl 12(%esp), %eax
- cmpl %eax, 16(%esp)
- jb __chk_fail
- jmp __memset_sse2
- cfi_endproc
- .size __memset_chk_sse2, .-__memset_chk_sse2
-
- .type __memset_chk_sse2_rep, @function
- .p2align 4;
-__memset_chk_sse2_rep:
- cfi_startproc
- CALL_MCOUNT
- movl 12(%esp), %eax
- cmpl %eax, 16(%esp)
- jb __chk_fail
- jmp __memset_sse2_rep
- cfi_endproc
- .size __memset_chk_sse2_rep, .-__memset_chk_sse2_rep
-
- .type __memset_chk_ia32, @function
- .p2align 4;
-__memset_chk_ia32:
- cfi_startproc
- CALL_MCOUNT
- movl 12(%esp), %eax
- cmpl %eax, 16(%esp)
- jb __chk_fail
- jmp __memset_ia32
- cfi_endproc
- .size __memset_chk_ia32, .-__memset_chk_ia32
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/rawmemchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/rawmemchr-sse2-bsf.S
deleted file mode 100644
index 88c0e5776c..0000000000
--- a/sysdeps/i386/i686/multiarch/rawmemchr-sse2-bsf.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_RAWMEMCHR
-#define MEMCHR __rawmemchr_sse2_bsf
-#include "memchr-sse2-bsf.S"
diff --git a/sysdeps/i386/i686/multiarch/rawmemchr-sse2.S b/sysdeps/i386/i686/multiarch/rawmemchr-sse2.S
deleted file mode 100644
index 038c74896b..0000000000
--- a/sysdeps/i386/i686/multiarch/rawmemchr-sse2.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_RAWMEMCHR
-#define MEMCHR __rawmemchr_sse2
-#include "memchr-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/rawmemchr.S b/sysdeps/i386/i686/multiarch/rawmemchr.S
deleted file mode 100644
index 0a41d63ee8..0000000000
--- a/sysdeps/i386/i686/multiarch/rawmemchr.S
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Multiple versions of rawmemchr
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if IS_IN (libc)
- .text
-ENTRY(__rawmemchr)
- .type __rawmemchr, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- HAS_ARCH_FEATURE (Slow_BSF)
- jz 3f
-
- LOAD_FUNC_GOT_EAX (__rawmemchr_sse2)
- ret
-
-2: LOAD_FUNC_GOT_EAX (__rawmemchr_ia32)
- ret
-
-3: LOAD_FUNC_GOT_EAX (__rawmemchr_sse2_bsf)
- ret
-END(__rawmemchr)
-
-weak_alias(__rawmemchr, rawmemchr)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __rawmemchr_ia32, @function; \
- .globl __rawmemchr_ia32; \
- .p2align 4; \
- __rawmemchr_ia32: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __rawmemchr_ia32, .-__rawmemchr_ia32
-
-# undef libc_hidden_def
-/* IFUNC doesn't work with the hidden functions in shared library since
- they will be called without setting up EBX needed for PLT which is
- used by IFUNC. */
-# define libc_hidden_def(name) \
- .globl __GI___rawmemchr; __GI___rawmemchr = __rawmemchr_ia32
-
-#endif
-#include "../../rawmemchr.S"
diff --git a/sysdeps/i386/i686/multiarch/rtld-strnlen.c b/sysdeps/i386/i686/multiarch/rtld-strnlen.c
deleted file mode 100644
index 1aa5440644..0000000000
--- a/sysdeps/i386/i686/multiarch/rtld-strnlen.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <string/strnlen.c>
diff --git a/sysdeps/i386/i686/multiarch/s_fma-fma.c b/sysdeps/i386/i686/multiarch/s_fma-fma.c
deleted file mode 100644
index 2e9619f97c..0000000000
--- a/sysdeps/i386/i686/multiarch/s_fma-fma.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/* FMA version of fma.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <config.h>
-
-double
-__fma_fma (double x, double y, double z)
-{
- asm ("vfmadd213sd %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z));
- return x;
-}
diff --git a/sysdeps/i386/i686/multiarch/s_fma.c b/sysdeps/i386/i686/multiarch/s_fma.c
deleted file mode 100644
index 411ebb2ba9..0000000000
--- a/sysdeps/i386/i686/multiarch/s_fma.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* Multiple versions of fma.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <config.h>
-
-#include <math.h>
-#include <init-arch.h>
-
-extern double __fma_ia32 (double x, double y, double z) attribute_hidden;
-extern double __fma_fma (double x, double y, double z) attribute_hidden;
-
-libm_ifunc (__fma,
- HAS_ARCH_FEATURE (FMA_Usable) ? __fma_fma : __fma_ia32);
-weak_alias (__fma, fma)
-
-#define __fma __fma_ia32
-
-#include <sysdeps/ieee754/ldbl-96/s_fma.c>
diff --git a/sysdeps/i386/i686/multiarch/s_fmaf-fma.c b/sysdeps/i386/i686/multiarch/s_fmaf-fma.c
deleted file mode 100644
index ee57abfda2..0000000000
--- a/sysdeps/i386/i686/multiarch/s_fmaf-fma.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/* FMA version of fmaf.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <config.h>
-
-float
-__fmaf_fma (float x, float y, float z)
-{
- asm ("vfmadd213ss %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z));
- return x;
-}
diff --git a/sysdeps/i386/i686/multiarch/s_fmaf.c b/sysdeps/i386/i686/multiarch/s_fmaf.c
deleted file mode 100644
index 00b0fbcfc5..0000000000
--- a/sysdeps/i386/i686/multiarch/s_fmaf.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* Multiple versions of fmaf.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <config.h>
-
-#include <math.h>
-#include <init-arch.h>
-
-extern float __fmaf_ia32 (float x, float y, float z) attribute_hidden;
-extern float __fmaf_fma (float x, float y, float z) attribute_hidden;
-
-libm_ifunc (__fmaf,
- HAS_ARCH_FEATURE (FMA_Usable) ? __fmaf_fma : __fmaf_ia32);
-weak_alias (__fmaf, fmaf)
-
-#define __fmaf __fmaf_ia32
-
-#include <sysdeps/ieee754/dbl-64/s_fmaf.c>
diff --git a/sysdeps/i386/i686/multiarch/sched_cpucount.c b/sysdeps/i386/i686/multiarch/sched_cpucount.c
deleted file mode 100644
index 7db31b02f8..0000000000
--- a/sysdeps/i386/i686/multiarch/sched_cpucount.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/x86_64/multiarch/sched_cpucount.c>
diff --git a/sysdeps/i386/i686/multiarch/stpcpy-sse2.S b/sysdeps/i386/i686/multiarch/stpcpy-sse2.S
deleted file mode 100644
index 46ca1b3074..0000000000
--- a/sysdeps/i386/i686/multiarch/stpcpy-sse2.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STPCPY
-#define STRCPY __stpcpy_sse2
-#include "strcpy-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S b/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S
deleted file mode 100644
index d971c2da38..0000000000
--- a/sysdeps/i386/i686/multiarch/stpcpy-ssse3.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STPCPY
-#define STRCPY __stpcpy_ssse3
-#include "strcpy-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/stpcpy.S b/sysdeps/i386/i686/multiarch/stpcpy.S
deleted file mode 100644
index ee81ab6ae3..0000000000
--- a/sysdeps/i386/i686/multiarch/stpcpy.S
+++ /dev/null
@@ -1,9 +0,0 @@
-/* Multiple versions of stpcpy
- All versions must be listed in ifunc-impl-list.c. */
-#define USE_AS_STPCPY
-#define STRCPY __stpcpy
-#include "strcpy.S"
-
-weak_alias (__stpcpy, stpcpy)
-libc_hidden_def (__stpcpy)
-libc_hidden_builtin_def (stpcpy)
diff --git a/sysdeps/i386/i686/multiarch/stpncpy-sse2.S b/sysdeps/i386/i686/multiarch/stpncpy-sse2.S
deleted file mode 100644
index 37a703cb76..0000000000
--- a/sysdeps/i386/i686/multiarch/stpncpy-sse2.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_STPCPY
-#define USE_AS_STRNCPY
-#define STRCPY __stpncpy_sse2
-#include "strcpy-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S b/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S
deleted file mode 100644
index 14ed16f6b5..0000000000
--- a/sysdeps/i386/i686/multiarch/stpncpy-ssse3.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_STPCPY
-#define USE_AS_STRNCPY
-#define STRCPY __stpncpy_ssse3
-#include "strcpy-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/stpncpy.S b/sysdeps/i386/i686/multiarch/stpncpy.S
deleted file mode 100644
index 2698ca6a8c..0000000000
--- a/sysdeps/i386/i686/multiarch/stpncpy.S
+++ /dev/null
@@ -1,8 +0,0 @@
-/* Multiple versions of stpncpy
- All versions must be listed in ifunc-impl-list.c. */
-#define STRCPY __stpncpy
-#define USE_AS_STPCPY
-#define USE_AS_STRNCPY
-#include "strcpy.S"
-
-weak_alias (__stpncpy, stpncpy)
diff --git a/sysdeps/i386/i686/multiarch/strcasecmp-c.c b/sysdeps/i386/i686/multiarch/strcasecmp-c.c
deleted file mode 100644
index 753c6ec84a..0000000000
--- a/sysdeps/i386/i686/multiarch/strcasecmp-c.c
+++ /dev/null
@@ -1,12 +0,0 @@
-#include <string.h>
-
-extern __typeof (strcasecmp) __strcasecmp_nonascii;
-
-#define __strcasecmp __strcasecmp_nonascii
-#include <string/strcasecmp.c>
-
-strong_alias (__strcasecmp_nonascii, __strcasecmp_ia32)
-
-/* The needs of strcasecmp in libc are minimal, no need to go through
- the IFUNC. */
-strong_alias (__strcasecmp_nonascii, __GI___strcasecmp)
diff --git a/sysdeps/i386/i686/multiarch/strcasecmp.S b/sysdeps/i386/i686/multiarch/strcasecmp.S
deleted file mode 100644
index ec59276408..0000000000
--- a/sysdeps/i386/i686/multiarch/strcasecmp.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/* Entry point for multi-version x86 strcasecmp.
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY(__strcasecmp)
- .type __strcasecmp, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__strcasecmp_ia32)
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- LOAD_FUNC_GOT_EAX (__strcasecmp_ssse3)
- HAS_CPU_FEATURE (SSE4_2)
- jz 2f
- HAS_ARCH_FEATURE (Slow_SSE4_2)
- jnz 2f
- LOAD_FUNC_GOT_EAX (__strcasecmp_sse4_2)
-2: ret
-END(__strcasecmp)
-
-weak_alias (__strcasecmp, strcasecmp)
diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c b/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c
deleted file mode 100644
index d4fcd2b4a1..0000000000
--- a/sysdeps/i386/i686/multiarch/strcasecmp_l-c.c
+++ /dev/null
@@ -1,13 +0,0 @@
-#include <string.h>
-
-extern __typeof (strcasecmp_l) __strcasecmp_l_nonascii;
-
-#define __strcasecmp_l __strcasecmp_l_nonascii
-#define USE_IN_EXTENDED_LOCALE_MODEL 1
-#include <string/strcasecmp.c>
-
-strong_alias (__strcasecmp_l_nonascii, __strcasecmp_l_ia32)
-
-/* The needs of strcasecmp in libc are minimal, no need to go through
- the IFUNC. */
-strong_alias (__strcasecmp_l_nonascii, __GI___strcasecmp_l)
diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S b/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S
deleted file mode 100644
index 411d4153f2..0000000000
--- a/sysdeps/i386/i686/multiarch/strcasecmp_l-sse4.S
+++ /dev/null
@@ -1,2 +0,0 @@
-#define USE_AS_STRCASECMP_L 1
-#include "strcmp-sse4.S"
diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S b/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S
deleted file mode 100644
index a22b93c518..0000000000
--- a/sysdeps/i386/i686/multiarch/strcasecmp_l-ssse3.S
+++ /dev/null
@@ -1,2 +0,0 @@
-#define USE_AS_STRCASECMP_L 1
-#include "strcmp-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/strcasecmp_l.S b/sysdeps/i386/i686/multiarch/strcasecmp_l.S
deleted file mode 100644
index 711c09b0dc..0000000000
--- a/sysdeps/i386/i686/multiarch/strcasecmp_l.S
+++ /dev/null
@@ -1,7 +0,0 @@
-/* Multiple versions of strcasecmp_l
- All versions must be listed in ifunc-impl-list.c. */
-#define STRCMP __strcasecmp_l
-#define USE_AS_STRCASECMP_L
-#include "strcmp.S"
-
-weak_alias (__strcasecmp_l, strcasecmp_l)
diff --git a/sysdeps/i386/i686/multiarch/strcat-sse2.S b/sysdeps/i386/i686/multiarch/strcat-sse2.S
deleted file mode 100644
index 6359c7330c..0000000000
--- a/sysdeps/i386/i686/multiarch/strcat-sse2.S
+++ /dev/null
@@ -1,1245 +0,0 @@
-/* strcat with SSE2
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifdef SHARED
-# define JMPTBL(I, B) I - B
-
-/* Load an entry in a jump table into ECX and branch to it. TABLE is a
- jump table with relative offsets. INDEX is a register contains the
- index into the jump table. SCALE is the scale of INDEX. */
-
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- /* We first load PC into ECX. */ \
- SETUP_PIC_REG(cx); \
- /* Get the address of the jump table. */ \
- addl $(TABLE - .), %ecx; \
- /* Get the entry and convert the relative offset to the \
- absolute address. */ \
- addl (%ecx,INDEX,SCALE), %ecx; \
- /* We loaded the jump table and adjusted ECX. Go. */ \
- jmp *%ecx
-# else
-# define JMPTBL(I, B) I
-
-/* Branch to an entry in a jump table. TABLE is a jump table with
- absolute offsets. INDEX is a register contains the index into the
- jump table. SCALE is the scale of INDEX. */
-
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- jmp *TABLE(,INDEX,SCALE)
-# endif
-
-# ifndef STRCAT
-# define STRCAT __strcat_sse2
-# endif
-
-# define PARMS 4
-# define STR1 PARMS+4
-# define STR2 STR1+4
-
-# ifdef USE_AS_STRNCAT
-# define LEN STR2+8
-# define STR3 STR1+4
-# else
-# define STR3 STR1
-# endif
-
-# define USE_AS_STRCAT
-# ifdef USE_AS_STRNCAT
-# define RETURN POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi);
-# else
-# define RETURN POP(%esi); ret; CFI_PUSH(%esi);
-# endif
-
-.text
-ENTRY (STRCAT)
- PUSH (%esi)
- mov STR1(%esp), %eax
- mov STR2(%esp), %esi
-# ifdef USE_AS_STRNCAT
- PUSH (%ebx)
- movl LEN(%esp), %ebx
- test %ebx, %ebx
- jz L(ExitZero)
-# endif
- cmpb $0, (%esi)
- mov %esi, %ecx
- mov %eax, %edx
- jz L(ExitZero)
-
- and $63, %ecx
- and $63, %edx
- cmp $32, %ecx
- ja L(StrlenCore7_1)
- cmp $48, %edx
- ja L(alignment_prolog)
-
- pxor %xmm0, %xmm0
- pxor %xmm4, %xmm4
- pxor %xmm7, %xmm7
- movdqu (%eax), %xmm1
- movdqu (%esi), %xmm5
- pcmpeqb %xmm1, %xmm0
- movdqu 16(%esi), %xmm6
- pmovmskb %xmm0, %ecx
- pcmpeqb %xmm5, %xmm4
- pcmpeqb %xmm6, %xmm7
- test %ecx, %ecx
- jnz L(exit_less16_)
- mov %eax, %ecx
- and $-16, %eax
- jmp L(loop_prolog)
-
-L(alignment_prolog):
- pxor %xmm0, %xmm0
- pxor %xmm4, %xmm4
- mov %edx, %ecx
- pxor %xmm7, %xmm7
- and $15, %ecx
- and $-16, %eax
- pcmpeqb (%eax), %xmm0
- movdqu (%esi), %xmm5
- movdqu 16(%esi), %xmm6
- pmovmskb %xmm0, %edx
- pcmpeqb %xmm5, %xmm4
- shr %cl, %edx
- pcmpeqb %xmm6, %xmm7
- test %edx, %edx
- jnz L(exit_less16)
- add %eax, %ecx
-
- pxor %xmm0, %xmm0
-L(loop_prolog):
- pxor %xmm1, %xmm1
- pxor %xmm2, %xmm2
- pxor %xmm3, %xmm3
- .p2align 4
-L(align16_loop):
- pcmpeqb 16(%eax), %xmm0
- pmovmskb %xmm0, %edx
- test %edx, %edx
- jnz L(exit16)
-
- pcmpeqb 32(%eax), %xmm1
- pmovmskb %xmm1, %edx
- test %edx, %edx
- jnz L(exit32)
-
- pcmpeqb 48(%eax), %xmm2
- pmovmskb %xmm2, %edx
- test %edx, %edx
- jnz L(exit48)
-
- pcmpeqb 64(%eax), %xmm3
- pmovmskb %xmm3, %edx
- lea 64(%eax), %eax
- test %edx, %edx
- jz L(align16_loop)
- bsf %edx, %edx
- add %edx, %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit16):
- bsf %edx, %edx
- lea 16(%eax, %edx), %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit32):
- bsf %edx, %edx
- lea 32(%eax, %edx), %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit48):
- bsf %edx, %edx
- lea 48(%eax, %edx), %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_less16):
- bsf %edx, %edx
- add %ecx, %eax
- add %edx, %eax
- jmp L(StartStrcpyPart)
-
- .p2align 4
-L(exit_less16_):
- bsf %ecx, %ecx
- add %ecx, %eax
-
- .p2align 4
-L(StartStrcpyPart):
- pmovmskb %xmm4, %edx
-# ifdef USE_AS_STRNCAT
- cmp $16, %ebx
- jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16BytesTail1)
-
- movdqu %xmm5, (%eax)
- pmovmskb %xmm7, %edx
-# ifdef USE_AS_STRNCAT
- cmp $32, %ebx
- jbe L(CopyFrom1To32Bytes1Case2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To32Bytes1)
-
- mov %esi, %ecx
- and $-16, %esi
- and $15, %ecx
- pxor %xmm0, %xmm0
-# ifdef USE_AS_STRNCAT
- add %ecx, %ebx
- sbb %edx, %edx
- or %edx, %ebx
-# endif
- sub %ecx, %eax
- jmp L(Unalign16Both)
-
-L(StrlenCore7_1):
- mov %eax, %ecx
- pxor %xmm0, %xmm0
- and $15, %ecx
- and $-16, %eax
- pcmpeqb (%eax), %xmm0
- pmovmskb %xmm0, %edx
- shr %cl, %edx
- test %edx, %edx
- jnz L(exit_less16_1)
- add %eax, %ecx
-
- pxor %xmm0, %xmm0
- pxor %xmm1, %xmm1
- pxor %xmm2, %xmm2
- pxor %xmm3, %xmm3
-
- .p2align 4
-L(align16_loop_1):
- pcmpeqb 16(%eax), %xmm0
- pmovmskb %xmm0, %edx
- test %edx, %edx
- jnz L(exit16_1)
-
- pcmpeqb 32(%eax), %xmm1
- pmovmskb %xmm1, %edx
- test %edx, %edx
- jnz L(exit32_1)
-
- pcmpeqb 48(%eax), %xmm2
- pmovmskb %xmm2, %edx
- test %edx, %edx
- jnz L(exit48_1)
-
- pcmpeqb 64(%eax), %xmm3
- pmovmskb %xmm3, %edx
- lea 64(%eax), %eax
- test %edx, %edx
- jz L(align16_loop_1)
- bsf %edx, %edx
- add %edx, %eax
- jmp L(StartStrcpyPart_1)
-
- .p2align 4
-L(exit16_1):
- bsf %edx, %edx
- lea 16(%eax, %edx), %eax
- jmp L(StartStrcpyPart_1)
-
- .p2align 4
-L(exit32_1):
- bsf %edx, %edx
- lea 32(%eax, %edx), %eax
- jmp L(StartStrcpyPart_1)
-
- .p2align 4
-L(exit48_1):
- bsf %edx, %edx
- lea 48(%eax, %edx), %eax
- jmp L(StartStrcpyPart_1)
-
- .p2align 4
-L(exit_less16_1):
- bsf %edx, %edx
- add %ecx, %eax
- add %edx, %eax
-
- .p2align 4
-L(StartStrcpyPart_1):
- mov %esi, %ecx
- and $15, %ecx
- and $-16, %esi
- pxor %xmm0, %xmm0
- pxor %xmm1, %xmm1
-
-# ifdef USE_AS_STRNCAT
- cmp $48, %ebx
- ja L(BigN)
-# endif
- pcmpeqb (%esi), %xmm1
-# ifdef USE_AS_STRNCAT
- add %ecx, %ebx
-# endif
- pmovmskb %xmm1, %edx
- shr %cl, %edx
-# ifdef USE_AS_STRNCAT
- cmp $16, %ebx
- jbe L(CopyFrom1To16BytesTailCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16BytesTail)
-
- pcmpeqb 16(%esi), %xmm0
- pmovmskb %xmm0, %edx
-# ifdef USE_AS_STRNCAT
- cmp $32, %ebx
- jbe L(CopyFrom1To32BytesCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To32Bytes)
-
- movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
- movdqu %xmm1, (%eax)
- sub %ecx, %eax
-
- .p2align 4
-L(Unalign16Both):
- mov $16, %ecx
- movdqa (%esi, %ecx), %xmm1
- movaps 16(%esi, %ecx), %xmm2
- movdqu %xmm1, (%eax, %ecx)
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
-# ifdef USE_AS_STRNCAT
- sub $48, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
-L(Unalign16BothBigN):
- movaps 16(%esi, %ecx), %xmm3
- movdqu %xmm2, (%eax, %ecx)
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
-# ifdef USE_AS_STRNCAT
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%esi, %ecx), %xmm4
- movdqu %xmm3, (%eax, %ecx)
- pcmpeqb %xmm4, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
-# ifdef USE_AS_STRNCAT
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%esi, %ecx), %xmm1
- movdqu %xmm4, (%eax, %ecx)
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
-# ifdef USE_AS_STRNCAT
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%esi, %ecx), %xmm2
- movdqu %xmm1, (%eax, %ecx)
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
-# ifdef USE_AS_STRNCAT
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%esi, %ecx), %xmm3
- movdqu %xmm2, (%eax, %ecx)
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
-# ifdef USE_AS_STRNCAT
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
-
- movdqu %xmm3, (%eax, %ecx)
- mov %esi, %edx
- lea 16(%esi, %ecx), %esi
- and $-0x40, %esi
- sub %esi, %edx
- sub %edx, %eax
-# ifdef USE_AS_STRNCAT
- lea 128(%ebx, %edx), %ebx
-# endif
- movaps (%esi), %xmm2
- movaps %xmm2, %xmm4
- movaps 16(%esi), %xmm5
- movaps 32(%esi), %xmm3
- movaps %xmm3, %xmm6
- movaps 48(%esi), %xmm7
- pminub %xmm5, %xmm2
- pminub %xmm7, %xmm3
- pminub %xmm2, %xmm3
- pcmpeqb %xmm0, %xmm3
- pmovmskb %xmm3, %edx
-# ifdef USE_AS_STRNCAT
- sub $64, %ebx
- jbe L(UnalignedLeaveCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(Unaligned64Leave)
-
- .p2align 4
-L(Unaligned64Loop_start):
- add $64, %eax
- add $64, %esi
- movdqu %xmm4, -64(%eax)
- movaps (%esi), %xmm2
- movdqa %xmm2, %xmm4
- movdqu %xmm5, -48(%eax)
- movaps 16(%esi), %xmm5
- pminub %xmm5, %xmm2
- movaps 32(%esi), %xmm3
- movdqu %xmm6, -32(%eax)
- movaps %xmm3, %xmm6
- movdqu %xmm7, -16(%eax)
- movaps 48(%esi), %xmm7
- pminub %xmm7, %xmm3
- pminub %xmm2, %xmm3
- pcmpeqb %xmm0, %xmm3
- pmovmskb %xmm3, %edx
-# ifdef USE_AS_STRNCAT
- sub $64, %ebx
- jbe L(UnalignedLeaveCase2OrCase3)
-# endif
- test %edx, %edx
- jz L(Unaligned64Loop_start)
-
-L(Unaligned64Leave):
- pxor %xmm1, %xmm1
-
- pcmpeqb %xmm4, %xmm0
- pcmpeqb %xmm5, %xmm1
- pmovmskb %xmm0, %edx
- pmovmskb %xmm1, %ecx
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnaligned_0)
- test %ecx, %ecx
- jnz L(CopyFrom1To16BytesUnaligned_16)
-
- pcmpeqb %xmm6, %xmm0
- pcmpeqb %xmm7, %xmm1
- pmovmskb %xmm0, %edx
- pmovmskb %xmm1, %ecx
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnaligned_32)
-
- bsf %ecx, %edx
- movdqu %xmm4, (%eax)
- movdqu %xmm5, 16(%eax)
- movdqu %xmm6, 32(%eax)
- add $48, %esi
- add $48, %eax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-# ifdef USE_AS_STRNCAT
- .p2align 4
-L(BigN):
- pcmpeqb (%esi), %xmm1
- pmovmskb %xmm1, %edx
- shr %cl, %edx
- test %edx, %edx
- jnz L(CopyFrom1To16BytesTail)
-
- pcmpeqb 16(%esi), %xmm0
- pmovmskb %xmm0, %edx
- test %edx, %edx
- jnz L(CopyFrom1To32Bytes)
-
- movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
- movdqu %xmm1, (%eax)
- sub %ecx, %eax
- sub $48, %ebx
- add %ecx, %ebx
-
- mov $16, %ecx
- movdqa (%esi, %ecx), %xmm1
- movaps 16(%esi, %ecx), %xmm2
- movdqu %xmm1, (%eax, %ecx)
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
- jmp L(Unalign16BothBigN)
-# endif
-
-/*------------end of main part-------------------------------*/
-
-/* Case1 */
- .p2align 4
-L(CopyFrom1To16Bytes):
- add %ecx, %eax
- add %ecx, %esi
- bsf %edx, %edx
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesTail):
- add %ecx, %esi
- bsf %edx, %edx
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
- .p2align 4
-L(CopyFrom1To32Bytes1):
- add $16, %esi
- add $16, %eax
-L(CopyFrom1To16BytesTail1):
- bsf %edx, %edx
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
- .p2align 4
-L(CopyFrom1To32Bytes):
- bsf %edx, %edx
- add %ecx, %esi
- add $16, %edx
- sub %ecx, %edx
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesUnaligned_0):
- bsf %edx, %edx
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesUnaligned_16):
- bsf %ecx, %edx
- movdqu %xmm4, (%eax)
- add $16, %esi
- add $16, %eax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesUnaligned_32):
- bsf %edx, %edx
- movdqu %xmm4, (%eax)
- movdqu %xmm5, 16(%eax)
- add $32, %esi
- add $32, %eax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-# ifdef USE_AS_STRNCAT
-
- .p2align 4
-L(CopyFrom1To16BytesExit):
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-/* Case2 */
-
- .p2align 4
-L(CopyFrom1To16BytesCase2):
- add $16, %ebx
- add %ecx, %eax
- add %ecx, %esi
- bsf %edx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
- .p2align 4
-L(CopyFrom1To32BytesCase2):
- sub %ecx, %ebx
- add %ecx, %esi
- bsf %edx, %edx
- add $16, %edx
- sub %ecx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-L(CopyFrom1To16BytesTailCase2):
- sub %ecx, %ebx
- add %ecx, %esi
- bsf %edx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-L(CopyFrom1To16BytesTail1Case2):
- bsf %edx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-/* Case2 or Case3, Case3 */
-
- .p2align 4
-L(CopyFrom1To16BytesCase2OrCase3):
- test %edx, %edx
- jnz L(CopyFrom1To16BytesCase2)
-L(CopyFrom1To16BytesCase3):
- add $16, %ebx
- add %ecx, %eax
- add %ecx, %esi
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
- .p2align 4
-L(CopyFrom1To32BytesCase2OrCase3):
- test %edx, %edx
- jnz L(CopyFrom1To32BytesCase2)
- sub %ecx, %ebx
- add %ecx, %esi
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesTailCase2OrCase3):
- test %edx, %edx
- jnz L(CopyFrom1To16BytesTailCase2)
- sub %ecx, %ebx
- add %ecx, %esi
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
- .p2align 4
-L(CopyFrom1To32Bytes1Case2OrCase3):
- add $16, %eax
- add $16, %esi
- sub $16, %ebx
-L(CopyFrom1To16BytesTail1Case2OrCase3):
- test %edx, %edx
- jnz L(CopyFrom1To16BytesTail1Case2)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-
-# endif
-
-# ifdef USE_AS_STRNCAT
- .p2align 4
-L(StrncatExit0):
- movb %bh, (%eax)
- mov STR3(%esp), %eax
- RETURN
-# endif
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit1):
- movb %bh, 1(%eax)
-# endif
-L(Exit1):
-# ifdef USE_AS_STRNCAT
- movb (%esi), %dh
-# endif
- movb %dh, (%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit2):
- movb %bh, 2(%eax)
-# endif
-L(Exit2):
- movw (%esi), %dx
- movw %dx, (%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit3):
- movb %bh, 3(%eax)
-# endif
-L(Exit3):
- movw (%esi), %cx
- movw %cx, (%eax)
-# ifdef USE_AS_STRNCAT
- movb 2(%esi), %dh
-# endif
- movb %dh, 2(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit4):
- movb %bh, 4(%eax)
-# endif
-L(Exit4):
- movl (%esi), %edx
- movl %edx, (%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit5):
- movb %bh, 5(%eax)
-# endif
-L(Exit5):
- movl (%esi), %ecx
-# ifdef USE_AS_STRNCAT
- movb 4(%esi), %dh
-# endif
- movb %dh, 4(%eax)
- movl %ecx, (%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit6):
- movb %bh, 6(%eax)
-# endif
-L(Exit6):
- movl (%esi), %ecx
- movw 4(%esi), %dx
- movl %ecx, (%eax)
- movw %dx, 4(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit7):
- movb %bh, 7(%eax)
-# endif
-L(Exit7):
- movl (%esi), %ecx
- movl 3(%esi), %edx
- movl %ecx, (%eax)
- movl %edx, 3(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit8):
- movb %bh, 8(%eax)
-# endif
-L(Exit8):
- movlpd (%esi), %xmm0
- movlpd %xmm0, (%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit9):
- movb %bh, 9(%eax)
-# endif
-L(Exit9):
- movlpd (%esi), %xmm0
-# ifdef USE_AS_STRNCAT
- movb 8(%esi), %dh
-# endif
- movb %dh, 8(%eax)
- movlpd %xmm0, (%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit10):
- movb %bh, 10(%eax)
-# endif
-L(Exit10):
- movlpd (%esi), %xmm0
- movw 8(%esi), %dx
- movlpd %xmm0, (%eax)
- movw %dx, 8(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit11):
- movb %bh, 11(%eax)
-# endif
-L(Exit11):
- movlpd (%esi), %xmm0
- movl 7(%esi), %edx
- movlpd %xmm0, (%eax)
- movl %edx, 7(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit12):
- movb %bh, 12(%eax)
-# endif
-L(Exit12):
- movlpd (%esi), %xmm0
- movl 8(%esi), %edx
- movlpd %xmm0, (%eax)
- movl %edx, 8(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit13):
- movb %bh, 13(%eax)
-# endif
-L(Exit13):
- movlpd (%esi), %xmm0
- movlpd 5(%esi), %xmm1
- movlpd %xmm0, (%eax)
- movlpd %xmm1, 5(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit14):
- movb %bh, 14(%eax)
-# endif
-L(Exit14):
- movlpd (%esi), %xmm0
- movlpd 6(%esi), %xmm1
- movlpd %xmm0, (%eax)
- movlpd %xmm1, 6(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit15):
- movb %bh, 15(%eax)
-# endif
-L(Exit15):
- movlpd (%esi), %xmm0
- movlpd 7(%esi), %xmm1
- movlpd %xmm0, (%eax)
- movlpd %xmm1, 7(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit16):
- movb %bh, 16(%eax)
-# endif
-L(Exit16):
- movdqu (%esi), %xmm0
- movdqu %xmm0, (%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit17):
- movb %bh, 17(%eax)
-# endif
-L(Exit17):
- movdqu (%esi), %xmm0
-# ifdef USE_AS_STRNCAT
- movb 16(%esi), %dh
-# endif
- movdqu %xmm0, (%eax)
- movb %dh, 16(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit18):
- movb %bh, 18(%eax)
-# endif
-L(Exit18):
- movdqu (%esi), %xmm0
- movw 16(%esi), %cx
- movdqu %xmm0, (%eax)
- movw %cx, 16(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit19):
- movb %bh, 19(%eax)
-# endif
-L(Exit19):
- movdqu (%esi), %xmm0
- movl 15(%esi), %ecx
- movdqu %xmm0, (%eax)
- movl %ecx, 15(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit20):
- movb %bh, 20(%eax)
-# endif
-L(Exit20):
- movdqu (%esi), %xmm0
- movl 16(%esi), %ecx
- movdqu %xmm0, (%eax)
- movl %ecx, 16(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit21):
- movb %bh, 21(%eax)
-# endif
-L(Exit21):
- movdqu (%esi), %xmm0
- movl 16(%esi), %ecx
-# ifdef USE_AS_STRNCAT
- movb 20(%esi), %dh
-# endif
- movdqu %xmm0, (%eax)
- movl %ecx, 16(%eax)
- movb %dh, 20(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit22):
- movb %bh, 22(%eax)
-# endif
-L(Exit22):
- movdqu (%esi), %xmm0
- movlpd 14(%esi), %xmm3
- movdqu %xmm0, (%eax)
- movlpd %xmm3, 14(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit23):
- movb %bh, 23(%eax)
-# endif
-L(Exit23):
- movdqu (%esi), %xmm0
- movlpd 15(%esi), %xmm3
- movdqu %xmm0, (%eax)
- movlpd %xmm3, 15(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit24):
- movb %bh, 24(%eax)
-# endif
-L(Exit24):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movdqu %xmm0, (%eax)
- movlpd %xmm2, 16(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit25):
- movb %bh, 25(%eax)
-# endif
-L(Exit25):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
-# ifdef USE_AS_STRNCAT
- movb 24(%esi), %dh
-# endif
- movdqu %xmm0, (%eax)
- movlpd %xmm2, 16(%eax)
- movb %dh, 24(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit26):
- movb %bh, 26(%eax)
-# endif
-L(Exit26):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movw 24(%esi), %cx
- movdqu %xmm0, (%eax)
- movlpd %xmm2, 16(%eax)
- movw %cx, 24(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit27):
- movb %bh, 27(%eax)
-# endif
-L(Exit27):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movl 23(%esi), %ecx
- movdqu %xmm0, (%eax)
- movlpd %xmm2, 16(%eax)
- movl %ecx, 23(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit28):
- movb %bh, 28(%eax)
-# endif
-L(Exit28):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movl 24(%esi), %ecx
- movdqu %xmm0, (%eax)
- movlpd %xmm2, 16(%eax)
- movl %ecx, 24(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit29):
- movb %bh, 29(%eax)
-# endif
-L(Exit29):
- movdqu (%esi), %xmm0
- movdqu 13(%esi), %xmm2
- movdqu %xmm0, (%eax)
- movdqu %xmm2, 13(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit30):
- movb %bh, 30(%eax)
-# endif
-L(Exit30):
- movdqu (%esi), %xmm0
- movdqu 14(%esi), %xmm2
- movdqu %xmm0, (%eax)
- movdqu %xmm2, 14(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit31):
- movb %bh, 31(%eax)
-# endif
-L(Exit31):
- movdqu (%esi), %xmm0
- movdqu 15(%esi), %xmm2
- movdqu %xmm0, (%eax)
- movdqu %xmm2, 15(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-# ifdef USE_AS_STRNCAT
-L(StrncatExit32):
- movb %bh, 32(%eax)
-# endif
-L(Exit32):
- movdqu (%esi), %xmm0
- movdqu 16(%esi), %xmm2
- movdqu %xmm0, (%eax)
- movdqu %xmm2, 16(%eax)
- mov STR3(%esp), %eax
- RETURN
-
-# ifdef USE_AS_STRNCAT
-
- .p2align 4
-L(UnalignedLeaveCase2OrCase3):
- test %edx, %edx
- jnz L(Unaligned64LeaveCase2)
-L(Unaligned64LeaveCase3):
- lea 64(%ebx), %ecx
- and $-16, %ecx
- add $48, %ebx
- jl L(CopyFrom1To16BytesCase3)
- movdqu %xmm4, (%eax)
- sub $16, %ebx
- jb L(CopyFrom1To16BytesCase3)
- movdqu %xmm5, 16(%eax)
- sub $16, %ebx
- jb L(CopyFrom1To16BytesCase3)
- movdqu %xmm6, 32(%eax)
- sub $16, %ebx
- jb L(CopyFrom1To16BytesCase3)
- movdqu %xmm7, 48(%eax)
- xor %bh, %bh
- movb %bh, 64(%eax)
- mov STR3(%esp), %eax
- RETURN
-
- .p2align 4
-L(Unaligned64LeaveCase2):
- xor %ecx, %ecx
- pcmpeqb %xmm4, %xmm0
- pmovmskb %xmm0, %edx
- add $48, %ebx
- jle L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm5, %xmm0
- pmovmskb %xmm0, %edx
- movdqu %xmm4, (%eax)
- add $16, %ecx
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm6, %xmm0
- pmovmskb %xmm0, %edx
- movdqu %xmm5, 16(%eax)
- add $16, %ecx
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm7, %xmm0
- pmovmskb %xmm0, %edx
- movdqu %xmm6, 32(%eax)
- lea 16(%eax, %ecx), %eax
- lea 16(%esi, %ecx), %esi
- bsf %edx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
-# endif
- .p2align 4
-L(ExitZero):
- RETURN
-
-END (STRCAT)
-
- .p2align 4
- .section .rodata
-L(ExitTable):
- .int JMPTBL(L(Exit1), L(ExitTable))
- .int JMPTBL(L(Exit2), L(ExitTable))
- .int JMPTBL(L(Exit3), L(ExitTable))
- .int JMPTBL(L(Exit4), L(ExitTable))
- .int JMPTBL(L(Exit5), L(ExitTable))
- .int JMPTBL(L(Exit6), L(ExitTable))
- .int JMPTBL(L(Exit7), L(ExitTable))
- .int JMPTBL(L(Exit8), L(ExitTable))
- .int JMPTBL(L(Exit9), L(ExitTable))
- .int JMPTBL(L(Exit10), L(ExitTable))
- .int JMPTBL(L(Exit11), L(ExitTable))
- .int JMPTBL(L(Exit12), L(ExitTable))
- .int JMPTBL(L(Exit13), L(ExitTable))
- .int JMPTBL(L(Exit14), L(ExitTable))
- .int JMPTBL(L(Exit15), L(ExitTable))
- .int JMPTBL(L(Exit16), L(ExitTable))
- .int JMPTBL(L(Exit17), L(ExitTable))
- .int JMPTBL(L(Exit18), L(ExitTable))
- .int JMPTBL(L(Exit19), L(ExitTable))
- .int JMPTBL(L(Exit20), L(ExitTable))
- .int JMPTBL(L(Exit21), L(ExitTable))
- .int JMPTBL(L(Exit22), L(ExitTable))
- .int JMPTBL(L(Exit23), L(ExitTable))
- .int JMPTBL(L(Exit24), L(ExitTable))
- .int JMPTBL(L(Exit25), L(ExitTable))
- .int JMPTBL(L(Exit26), L(ExitTable))
- .int JMPTBL(L(Exit27), L(ExitTable))
- .int JMPTBL(L(Exit28), L(ExitTable))
- .int JMPTBL(L(Exit29), L(ExitTable))
- .int JMPTBL(L(Exit30), L(ExitTable))
- .int JMPTBL(L(Exit31), L(ExitTable))
- .int JMPTBL(L(Exit32), L(ExitTable))
-# ifdef USE_AS_STRNCAT
-L(ExitStrncatTable):
- .int JMPTBL(L(StrncatExit0), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit1), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit2), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit3), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit4), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit5), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit6), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit7), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit8), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit9), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit10), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit11), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit12), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit13), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit14), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit15), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit16), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit17), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit18), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit19), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit20), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit21), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit22), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit23), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit24), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit25), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit26), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit27), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit28), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit29), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit30), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit31), L(ExitStrncatTable))
- .int JMPTBL(L(StrncatExit32), L(ExitStrncatTable))
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcat-ssse3.S b/sysdeps/i386/i686/multiarch/strcat-ssse3.S
deleted file mode 100644
index 59ffbc60a5..0000000000
--- a/sysdeps/i386/i686/multiarch/strcat-ssse3.S
+++ /dev/null
@@ -1,572 +0,0 @@
-/* strcat with SSSE3
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifndef STRCAT
-# define STRCAT __strcat_ssse3
-# endif
-
-# define PARMS 4
-# define STR1 PARMS+4
-# define STR2 STR1+4
-
-# ifdef USE_AS_STRNCAT
-# define LEN STR2+8
-# endif
-
-# define USE_AS_STRCAT
-
-.text
-ENTRY (STRCAT)
- PUSH (%edi)
- mov STR1(%esp), %edi
- mov %edi, %edx
-
-# define RETURN jmp L(StartStrcpyPart)
-# include "strlen-sse2.S"
-
-L(StartStrcpyPart):
- mov STR2(%esp), %ecx
- lea (%edi, %eax), %edx
-# ifdef USE_AS_STRNCAT
- PUSH (%ebx)
- mov LEN(%esp), %ebx
- test %ebx, %ebx
- jz L(StrncatExit0)
- cmp $8, %ebx
- jbe L(StrncatExit8Bytes)
-# endif
- cmpb $0, (%ecx)
- jz L(Exit1)
- cmpb $0, 1(%ecx)
- jz L(Exit2)
- cmpb $0, 2(%ecx)
- jz L(Exit3)
- cmpb $0, 3(%ecx)
- jz L(Exit4)
- cmpb $0, 4(%ecx)
- jz L(Exit5)
- cmpb $0, 5(%ecx)
- jz L(Exit6)
- cmpb $0, 6(%ecx)
- jz L(Exit7)
- cmpb $0, 7(%ecx)
- jz L(Exit8)
- cmpb $0, 8(%ecx)
- jz L(Exit9)
-# ifdef USE_AS_STRNCAT
- cmp $16, %ebx
- jb L(StrncatExit15Bytes)
-# endif
- cmpb $0, 9(%ecx)
- jz L(Exit10)
- cmpb $0, 10(%ecx)
- jz L(Exit11)
- cmpb $0, 11(%ecx)
- jz L(Exit12)
- cmpb $0, 12(%ecx)
- jz L(Exit13)
- cmpb $0, 13(%ecx)
- jz L(Exit14)
- cmpb $0, 14(%ecx)
- jz L(Exit15)
- cmpb $0, 15(%ecx)
- jz L(Exit16)
-# ifdef USE_AS_STRNCAT
- cmp $16, %ebx
- je L(StrncatExit16)
-
-# define RETURN1 \
- POP (%ebx); \
- POP (%edi); \
- ret; \
- CFI_PUSH (%ebx); \
- CFI_PUSH (%edi)
-# define USE_AS_STRNCPY
-# else
-# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi)
-# endif
-# include "strcpy-ssse3.S"
- .p2align 4
-L(CopyFrom1To16Bytes):
- add %esi, %edx
- add %esi, %ecx
-
- POP (%esi)
- test %al, %al
- jz L(ExitHigh)
- test $0x01, %al
- jnz L(Exit1)
- test $0x02, %al
- jnz L(Exit2)
- test $0x04, %al
- jnz L(Exit3)
- test $0x08, %al
- jnz L(Exit4)
- test $0x10, %al
- jnz L(Exit5)
- test $0x20, %al
- jnz L(Exit6)
- test $0x40, %al
- jnz L(Exit7)
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(ExitHigh):
- test $0x01, %ah
- jnz L(Exit9)
- test $0x02, %ah
- jnz L(Exit10)
- test $0x04, %ah
- jnz L(Exit11)
- test $0x08, %ah
- jnz L(Exit12)
- test $0x10, %ah
- jnz L(Exit13)
- test $0x20, %ah
- jnz L(Exit14)
- test $0x40, %ah
- jnz L(Exit15)
- movlpd (%ecx), %xmm0
- movlpd 8(%ecx), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 8(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit1):
- movb %bh, 1(%edx)
-L(Exit1):
- movb (%ecx), %al
- movb %al, (%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit2):
- movb %bh, 2(%edx)
-L(Exit2):
- movw (%ecx), %ax
- movw %ax, (%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit3):
- movb %bh, 3(%edx)
-L(Exit3):
- movw (%ecx), %ax
- movw %ax, (%edx)
- movb 2(%ecx), %al
- movb %al, 2(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit4):
- movb %bh, 4(%edx)
-L(Exit4):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit5):
- movb %bh, 5(%edx)
-L(Exit5):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movb 4(%ecx), %al
- movb %al, 4(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit6):
- movb %bh, 6(%edx)
-L(Exit6):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movw 4(%ecx), %ax
- movw %ax, 4(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit7):
- movb %bh, 7(%edx)
-L(Exit7):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 3(%ecx), %eax
- movl %eax, 3(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit8):
- movb %bh, 8(%edx)
-L(Exit8):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit9):
- movb %bh, 9(%edx)
-L(Exit9):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movb 8(%ecx), %al
- movb %al, 8(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit10):
- movb %bh, 10(%edx)
-L(Exit10):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movw 8(%ecx), %ax
- movw %ax, 8(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit11):
- movb %bh, 11(%edx)
-L(Exit11):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movl 7(%ecx), %eax
- movl %eax, 7(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit12):
- movb %bh, 12(%edx)
-L(Exit12):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movl 8(%ecx), %eax
- movl %eax, 8(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit13):
- movb %bh, 13(%edx)
-L(Exit13):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 5(%ecx), %xmm0
- movlpd %xmm0, 5(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit14):
- movb %bh, 14(%edx)
-L(Exit14):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 6(%ecx), %xmm0
- movlpd %xmm0, 6(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit15):
- movb %bh, 15(%edx)
-L(Exit15):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 7(%ecx), %xmm0
- movlpd %xmm0, 7(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit16):
- movb %bh, 16(%edx)
-L(Exit16):
- movlpd (%ecx), %xmm0
- movlpd 8(%ecx), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 8(%edx)
- movl %edi, %eax
- RETURN1
-
-# ifdef USE_AS_STRNCPY
-
- CFI_PUSH(%esi)
-
- .p2align 4
-L(CopyFrom1To16BytesCase2):
- add $16, %ebx
- add %esi, %ecx
- lea (%esi, %edx), %esi
- lea -9(%ebx), %edx
- and $1<<7, %dh
- or %al, %dh
- test %dh, %dh
- lea (%esi), %edx
- POP (%esi)
- jz L(ExitHighCase2)
-
- test $0x01, %al
- jnz L(Exit1)
- cmp $1, %ebx
- je L(StrncatExit1)
- test $0x02, %al
- jnz L(Exit2)
- cmp $2, %ebx
- je L(StrncatExit2)
- test $0x04, %al
- jnz L(Exit3)
- cmp $3, %ebx
- je L(StrncatExit3)
- test $0x08, %al
- jnz L(Exit4)
- cmp $4, %ebx
- je L(StrncatExit4)
- test $0x10, %al
- jnz L(Exit5)
- cmp $5, %ebx
- je L(StrncatExit5)
- test $0x20, %al
- jnz L(Exit6)
- cmp $6, %ebx
- je L(StrncatExit6)
- test $0x40, %al
- jnz L(Exit7)
- cmp $7, %ebx
- je L(StrncatExit7)
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- lea 7(%edx), %eax
- cmpb $1, (%eax)
- sbb $-1, %eax
- xor %cl, %cl
- movb %cl, (%eax)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(ExitHighCase2):
- test $0x01, %ah
- jnz L(Exit9)
- cmp $9, %ebx
- je L(StrncatExit9)
- test $0x02, %ah
- jnz L(Exit10)
- cmp $10, %ebx
- je L(StrncatExit10)
- test $0x04, %ah
- jnz L(Exit11)
- cmp $11, %ebx
- je L(StrncatExit11)
- test $0x8, %ah
- jnz L(Exit12)
- cmp $12, %ebx
- je L(StrncatExit12)
- test $0x10, %ah
- jnz L(Exit13)
- cmp $13, %ebx
- je L(StrncatExit13)
- test $0x20, %ah
- jnz L(Exit14)
- cmp $14, %ebx
- je L(StrncatExit14)
- test $0x40, %ah
- jnz L(Exit15)
- cmp $15, %ebx
- je L(StrncatExit15)
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 8(%ecx), %xmm1
- movlpd %xmm1, 8(%edx)
- movl %edi, %eax
- RETURN1
-
- CFI_PUSH(%esi)
-
-L(CopyFrom1To16BytesCase2OrCase3):
- test %eax, %eax
- jnz L(CopyFrom1To16BytesCase2)
-
- .p2align 4
-L(CopyFrom1To16BytesCase3):
- add $16, %ebx
- add %esi, %edx
- add %esi, %ecx
-
- POP (%esi)
-
- cmp $8, %ebx
- ja L(ExitHighCase3)
- cmp $1, %ebx
- je L(StrncatExit1)
- cmp $2, %ebx
- je L(StrncatExit2)
- cmp $3, %ebx
- je L(StrncatExit3)
- cmp $4, %ebx
- je L(StrncatExit4)
- cmp $5, %ebx
- je L(StrncatExit5)
- cmp $6, %ebx
- je L(StrncatExit6)
- cmp $7, %ebx
- je L(StrncatExit7)
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movb %bh, 8(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(ExitHighCase3):
- cmp $9, %ebx
- je L(StrncatExit9)
- cmp $10, %ebx
- je L(StrncatExit10)
- cmp $11, %ebx
- je L(StrncatExit11)
- cmp $12, %ebx
- je L(StrncatExit12)
- cmp $13, %ebx
- je L(StrncatExit13)
- cmp $14, %ebx
- je L(StrncatExit14)
- cmp $15, %ebx
- je L(StrncatExit15)
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 8(%ecx), %xmm1
- movlpd %xmm1, 8(%edx)
- movb %bh, 16(%edx)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit0):
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit15Bytes):
- cmp $9, %ebx
- je L(StrncatExit9)
- cmpb $0, 9(%ecx)
- jz L(Exit10)
- cmp $10, %ebx
- je L(StrncatExit10)
- cmpb $0, 10(%ecx)
- jz L(Exit11)
- cmp $11, %ebx
- je L(StrncatExit11)
- cmpb $0, 11(%ecx)
- jz L(Exit12)
- cmp $12, %ebx
- je L(StrncatExit12)
- cmpb $0, 12(%ecx)
- jz L(Exit13)
- cmp $13, %ebx
- je L(StrncatExit13)
- cmpb $0, 13(%ecx)
- jz L(Exit14)
- cmp $14, %ebx
- je L(StrncatExit14)
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 7(%ecx), %xmm0
- movlpd %xmm0, 7(%edx)
- lea 14(%edx), %eax
- cmpb $1, (%eax)
- sbb $-1, %eax
- movb %bh, (%eax)
- movl %edi, %eax
- RETURN1
-
- .p2align 4
-L(StrncatExit8Bytes):
- cmpb $0, (%ecx)
- jz L(Exit1)
- cmp $1, %ebx
- je L(StrncatExit1)
- cmpb $0, 1(%ecx)
- jz L(Exit2)
- cmp $2, %ebx
- je L(StrncatExit2)
- cmpb $0, 2(%ecx)
- jz L(Exit3)
- cmp $3, %ebx
- je L(StrncatExit3)
- cmpb $0, 3(%ecx)
- jz L(Exit4)
- cmp $4, %ebx
- je L(StrncatExit4)
- cmpb $0, 4(%ecx)
- jz L(Exit5)
- cmp $5, %ebx
- je L(StrncatExit5)
- cmpb $0, 5(%ecx)
- jz L(Exit6)
- cmp $6, %ebx
- je L(StrncatExit6)
- cmpb $0, 6(%ecx)
- jz L(Exit7)
- cmp $7, %ebx
- je L(StrncatExit7)
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- lea 7(%edx), %eax
- cmpb $1, (%eax)
- sbb $-1, %eax
- movb %bh, (%eax)
- movl %edi, %eax
- RETURN1
-
-# endif
-END (STRCAT)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcat.S b/sysdeps/i386/i686/multiarch/strcat.S
deleted file mode 100644
index 8412cb6f23..0000000000
--- a/sysdeps/i386/i686/multiarch/strcat.S
+++ /dev/null
@@ -1,92 +0,0 @@
-/* Multiple versions of strcat
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#ifndef USE_AS_STRNCAT
-# ifndef STRCAT
-# define STRCAT strcat
-# endif
-#endif
-
-#ifdef USE_AS_STRNCAT
-# define STRCAT_SSSE3 __strncat_ssse3
-# define STRCAT_SSE2 __strncat_sse2
-# define STRCAT_IA32 __strncat_ia32
-# define __GI_STRCAT __GI_strncat
-#else
-# define STRCAT_SSSE3 __strcat_ssse3
-# define STRCAT_SSE2 __strcat_sse2
-# define STRCAT_IA32 __strcat_ia32
-# define __GI_STRCAT __GI_strcat
-#endif
-
-
-/* Define multiple versions only for the definition in libc. Don't
- define multiple versions for strncat in static library since we
- need strncat before the initialization happened. */
-#if IS_IN (libc)
-
- .text
-ENTRY(STRCAT)
- .type STRCAT, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (STRCAT_IA32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (STRCAT_SSE2)
- HAS_ARCH_FEATURE (Fast_Unaligned_Load)
- jnz 2f
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- LOAD_FUNC_GOT_EAX (STRCAT_SSSE3)
-2: ret
-END(STRCAT)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type STRCAT_IA32, @function; \
- .align 16; \
- .globl STRCAT_IA32; \
- .hidden STRCAT_IA32; \
- STRCAT_IA32: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size STRCAT_IA32, .-STRCAT_IA32
-
-# ifdef SHARED
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcat calls through a PLT.
- The speedup we get from using SSSE3 instruction is likely eaten away
- by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_STRCAT; __GI_STRCAT = STRCAT_IA32
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
- .globl __GI___STRCAT; __GI___STRCAT = STRCAT_IA32
-
-# endif
-#endif
-
-#ifndef USE_AS_STRNCAT
-# include "../../strcat.S"
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
deleted file mode 100644
index 95fd7c084e..0000000000
--- a/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
+++ /dev/null
@@ -1,158 +0,0 @@
-/* strchr with SSE2 with bsf
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS 8
-# define ENTRANCE PUSH(%edi)
-# define RETURN POP(%edi); ret; CFI_PUSH(%edi);
-
-# define STR1 PARMS
-# define STR2 STR1+4
-
- .text
-ENTRY (__strchr_sse2_bsf)
-
- ENTRANCE
- mov STR1(%esp), %ecx
- movd STR2(%esp), %xmm1
-
- pxor %xmm2, %xmm2
- mov %ecx, %edi
- punpcklbw %xmm1, %xmm1
- punpcklbw %xmm1, %xmm1
- /* ECX has OFFSET. */
- and $15, %ecx
- pshufd $0, %xmm1, %xmm1
- je L(loop)
-
-/* Handle unaligned string. */
- and $-16, %edi
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- pcmpeqb %xmm1, %xmm0
- /* Find where NULL is. */
- pmovmskb %xmm2, %edx
- /* Check if there is a match. */
- pmovmskb %xmm0, %eax
- /* Remove the leading bytes. */
- sarl %cl, %edx
- sarl %cl, %eax
- test %eax, %eax
- je L(unaligned_no_match)
- /* Check which byte is a match. */
- bsf %eax, %eax
- /* Is there a NULL? */
- test %edx, %edx
- je L(unaligned_match)
- bsf %edx, %edx
- cmpl %edx, %eax
- /* Return NULL if NULL comes first. */
- ja L(return_null)
-L(unaligned_match):
- add %edi, %eax
- add %ecx, %eax
- RETURN
-
- .p2align 4
-L(unaligned_no_match):
- test %edx, %edx
- jne L(return_null)
- pxor %xmm2, %xmm2
-
- add $16, %edi
-
- .p2align 4
-/* Loop start on aligned string. */
-L(loop):
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- add $16, %edi
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %edx
- pmovmskb %xmm0, %eax
- or %eax, %edx
- jnz L(matches)
-
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- add $16, %edi
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %edx
- pmovmskb %xmm0, %eax
- or %eax, %edx
- jnz L(matches)
-
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- add $16, %edi
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %edx
- pmovmskb %xmm0, %eax
- or %eax, %edx
- jnz L(matches)
-
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- add $16, %edi
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %edx
- pmovmskb %xmm0, %eax
- or %eax, %edx
- jnz L(matches)
- jmp L(loop)
-
-L(matches):
- pmovmskb %xmm2, %edx
- test %eax, %eax
- jz L(return_null)
- bsf %eax, %eax
- /* There is a match. First find where NULL is. */
- test %edx, %edx
- je L(match)
- bsf %edx, %ecx
- /* Check if NULL comes first. */
- cmpl %ecx, %eax
- ja L(return_null)
-L(match):
- sub $16, %edi
- add %edi, %eax
- RETURN
-
-/* Return NULL. */
- .p2align 4
-L(return_null):
- xor %eax, %eax
- RETURN
-
-END (__strchr_sse2_bsf)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strchr-sse2.S b/sysdeps/i386/i686/multiarch/strchr-sse2.S
deleted file mode 100644
index 1f9e875b04..0000000000
--- a/sysdeps/i386/i686/multiarch/strchr-sse2.S
+++ /dev/null
@@ -1,348 +0,0 @@
-/* strchr SSE2 without bsf
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS 8
-# define ENTRANCE PUSH(%edi)
-# define RETURN POP(%edi); ret; CFI_PUSH(%edi);
-
-# define STR1 PARMS
-# define STR2 STR1+4
-
- atom_text_section
-ENTRY (__strchr_sse2)
-
- ENTRANCE
- mov STR1(%esp), %ecx
- movd STR2(%esp), %xmm1
-
- pxor %xmm2, %xmm2
- mov %ecx, %edi
- punpcklbw %xmm1, %xmm1
- punpcklbw %xmm1, %xmm1
- /* ECX has OFFSET. */
- and $15, %ecx
- pshufd $0, %xmm1, %xmm1
- je L(loop)
-
-/* Handle unaligned string. */
- and $-16, %edi
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- pcmpeqb %xmm1, %xmm0
- /* Find where NULL is. */
- pmovmskb %xmm2, %edx
- /* Check if there is a match. */
- pmovmskb %xmm0, %eax
- /* Remove the leading bytes. */
- sarl %cl, %edx
- sarl %cl, %eax
- test %eax, %eax
- jz L(unaligned_no_match)
- /* Check which byte is a match. */
- /* Is there a NULL? */
- add %ecx, %edi
- test %edx, %edx
- jz L(match_case1)
- jmp L(match_case2)
-
- .p2align 4
-L(unaligned_no_match):
- test %edx, %edx
- jne L(return_null)
-
- pxor %xmm2, %xmm2
- add $16, %edi
-
- .p2align 4
-/* Loop start on aligned string. */
-L(loop):
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %edx
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
- test %edx, %edx
- jnz L(return_null)
- add $16, %edi
-
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %edx
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
- test %edx, %edx
- jnz L(return_null)
- add $16, %edi
-
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %edx
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
- test %edx, %edx
- jnz L(return_null)
- add $16, %edi
-
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %edx
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches)
- test %edx, %edx
- jnz L(return_null)
- add $16, %edi
- jmp L(loop)
-
-L(matches):
- /* There is a match. First find where NULL is. */
- test %edx, %edx
- jz L(match_case1)
-
- .p2align 4
-L(match_case2):
- test %al, %al
- jz L(match_higth_case2)
-
- mov %al, %cl
- and $15, %cl
- jnz L(match_case2_4)
-
- mov %dl, %ch
- and $15, %ch
- jnz L(return_null)
-
- test $0x10, %al
- jnz L(Exit5)
- test $0x10, %dl
- jnz L(return_null)
- test $0x20, %al
- jnz L(Exit6)
- test $0x20, %dl
- jnz L(return_null)
- test $0x40, %al
- jnz L(Exit7)
- test $0x40, %dl
- jnz L(return_null)
- lea 7(%edi), %eax
- RETURN
-
- .p2align 4
-L(match_case2_4):
- test $0x01, %al
- jnz L(Exit1)
- test $0x01, %dl
- jnz L(return_null)
- test $0x02, %al
- jnz L(Exit2)
- test $0x02, %dl
- jnz L(return_null)
- test $0x04, %al
- jnz L(Exit3)
- test $0x04, %dl
- jnz L(return_null)
- lea 3(%edi), %eax
- RETURN
-
- .p2align 4
-L(match_higth_case2):
- test %dl, %dl
- jnz L(return_null)
-
- mov %ah, %cl
- and $15, %cl
- jnz L(match_case2_12)
-
- mov %dh, %ch
- and $15, %ch
- jnz L(return_null)
-
- test $0x10, %ah
- jnz L(Exit13)
- test $0x10, %dh
- jnz L(return_null)
- test $0x20, %ah
- jnz L(Exit14)
- test $0x20, %dh
- jnz L(return_null)
- test $0x40, %ah
- jnz L(Exit15)
- test $0x40, %dh
- jnz L(return_null)
- lea 15(%edi), %eax
- RETURN
-
- .p2align 4
-L(match_case2_12):
- test $0x01, %ah
- jnz L(Exit9)
- test $0x01, %dh
- jnz L(return_null)
- test $0x02, %ah
- jnz L(Exit10)
- test $0x02, %dh
- jnz L(return_null)
- test $0x04, %ah
- jnz L(Exit11)
- test $0x04, %dh
- jnz L(return_null)
- lea 11(%edi), %eax
- RETURN
-
- .p2align 4
-L(match_case1):
- test %al, %al
- jz L(match_higth_case1)
-
- test $0x01, %al
- jnz L(Exit1)
- test $0x02, %al
- jnz L(Exit2)
- test $0x04, %al
- jnz L(Exit3)
- test $0x08, %al
- jnz L(Exit4)
- test $0x10, %al
- jnz L(Exit5)
- test $0x20, %al
- jnz L(Exit6)
- test $0x40, %al
- jnz L(Exit7)
- lea 7(%edi), %eax
- RETURN
-
- .p2align 4
-L(match_higth_case1):
- test $0x01, %ah
- jnz L(Exit9)
- test $0x02, %ah
- jnz L(Exit10)
- test $0x04, %ah
- jnz L(Exit11)
- test $0x08, %ah
- jnz L(Exit12)
- test $0x10, %ah
- jnz L(Exit13)
- test $0x20, %ah
- jnz L(Exit14)
- test $0x40, %ah
- jnz L(Exit15)
- lea 15(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit1):
- lea (%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit2):
- lea 1(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit3):
- lea 2(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit4):
- lea 3(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit5):
- lea 4(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit6):
- lea 5(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit7):
- lea 6(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit9):
- lea 8(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit10):
- lea 9(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit11):
- lea 10(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit12):
- lea 11(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit13):
- lea 12(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit14):
- lea 13(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit15):
- lea 14(%edi), %eax
- RETURN
-
-/* Return NULL. */
- .p2align 4
-L(return_null):
- xor %eax, %eax
- RETURN
-
-END (__strchr_sse2)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strchr.S b/sysdeps/i386/i686/multiarch/strchr.S
deleted file mode 100644
index 5b97b1c767..0000000000
--- a/sysdeps/i386/i686/multiarch/strchr.S
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Multiple versions of strchr
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if IS_IN (libc)
- .text
-ENTRY(strchr)
- .type strchr, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__strchr_ia32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__strchr_sse2_bsf)
- HAS_ARCH_FEATURE (Slow_BSF)
- jz 2f
- LOAD_FUNC_GOT_EAX (__strchr_sse2)
-2: ret
-END(strchr)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __strchr_ia32, @function; \
- .globl __strchr_ia32; \
- .p2align 4; \
- __strchr_ia32: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __strchr_ia32, .-__strchr_ia32
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
- they will be called without setting up EBX needed for PLT which is
- used by IFUNC. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_strchr; __GI_strchr = __strchr_ia32
-#endif
-
-#include "../../i586/strchr.S"
diff --git a/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/sysdeps/i386/i686/multiarch/strcmp-sse4.S
deleted file mode 100644
index cd26058671..0000000000
--- a/sysdeps/i386/i686/multiarch/strcmp-sse4.S
+++ /dev/null
@@ -1,804 +0,0 @@
-/* strcmp with SSE4.2
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-#define PUSH(REG) pushl REG; CFI_PUSH (REG)
-#define POP(REG) popl REG; CFI_POP (REG)
-
-#ifdef USE_AS_STRNCMP
-# ifndef STRCMP
-# define STRCMP __strncmp_sse4_2
-# endif
-# define STR1 8
-# define STR2 STR1+4
-# define CNT STR2+4
-# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM)
-# define REM %ebp
-#elif defined USE_AS_STRCASECMP_L
-# include "locale-defines.h"
-# ifndef STRCMP
-# define STRCMP __strcasecmp_l_sse4_2
-# endif
-# ifdef PIC
-# define STR1 12
-# else
-# define STR1 8
-# endif
-# define STR2 STR1+4
-# define LOCALE 12 /* Loaded before the adjustment. */
-# ifdef PIC
-# define RETURN POP (%edi); POP (%ebx); ret; \
- .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (%edi)
-# else
-# define RETURN POP (%edi); ret; .p2align 4; CFI_PUSH (%edi)
-# endif
-# define NONASCII __strcasecmp_nonascii
-#elif defined USE_AS_STRNCASECMP_L
-# include "locale-defines.h"
-# ifndef STRCMP
-# define STRCMP __strncasecmp_l_sse4_2
-# endif
-# ifdef PIC
-# define STR1 16
-# else
-# define STR1 12
-# endif
-# define STR2 STR1+4
-# define CNT STR2+4
-# define LOCALE 16 /* Loaded before the adjustment. */
-# ifdef PIC
-# define RETURN POP (%edi); POP (REM); POP (%ebx); ret; \
- .p2align 4; \
- CFI_PUSH (%ebx); CFI_PUSH (REM); CFI_PUSH (%edi)
-# else
-# define RETURN POP (%edi); POP (REM); ret; \
- .p2align 4; CFI_PUSH (REM); CFI_PUSH (%edi)
-# endif
-# define REM %ebp
-# define NONASCII __strncasecmp_nonascii
-#else
-# ifndef STRCMP
-# define STRCMP __strcmp_sse4_2
-# endif
-# define STR1 4
-# define STR2 STR1+4
-# define RETURN ret; .p2align 4
-#endif
-
- .section .text.sse4.2,"ax",@progbits
-
-#ifdef USE_AS_STRCASECMP_L
-ENTRY (__strcasecmp_sse4_2)
-# ifdef PIC
- PUSH (%ebx)
- LOAD_PIC_REG(bx)
- movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
-# ifdef NO_TLS_DIRECT_SEG_REFS
- addl %gs:0, %eax
- movl (%eax), %eax
-# else
- movl %gs:(%eax), %eax
-# endif
-# else
-# ifdef NO_TLS_DIRECT_SEG_REFS
- movl %gs:0, %eax
- movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax
-# else
- movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax
-# endif
-# endif
-# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
- movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
-# else
- movl (%eax), %eax
-# endif
- testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
-# ifdef PIC
- je L(ascii)
- POP (%ebx)
- jmp __strcasecmp_nonascii
-# else
- jne __strcasecmp_nonascii
- jmp L(ascii)
-# endif
-END (__strcasecmp_sse4_2)
-#endif
-
-#ifdef USE_AS_STRNCASECMP_L
-ENTRY (__strncasecmp_sse4_2)
-# ifdef PIC
- PUSH (%ebx)
- LOAD_PIC_REG(bx)
- movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
-# ifdef NO_TLS_DIRECT_SEG_REFS
- addl %gs:0, %eax
- movl (%eax), %eax
-# else
- movl %gs:(%eax), %eax
-# endif
-# else
-# ifdef NO_TLS_DIRECT_SEG_REFS
- movl %gs:0, %eax
- movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax
-# else
- movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax
-# endif
-# endif
-# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
- movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
-# else
- movl (%eax), %eax
-# endif
- testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
-# ifdef PIC
- je L(ascii)
- POP (%ebx)
- jmp __strncasecmp_nonascii
-# else
- jne __strncasecmp_nonascii
- jmp L(ascii)
-# endif
-END (__strncasecmp_sse4_2)
-#endif
-
- ENTRY (STRCMP)
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movl LOCALE(%esp), %eax
-# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
- movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
-# else
- movl (%eax), %eax
-# endif
- testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
- jne NONASCII
-
-# ifdef PIC
- PUSH (%ebx)
- LOAD_PIC_REG(bx)
-# endif
-L(ascii):
- .section .rodata.cst16,"aM",@progbits,16
- .align 16
-.Lbelowupper:
- .quad 0x4040404040404040
- .quad 0x4040404040404040
-.Ltopupper:
- .quad 0x5b5b5b5b5b5b5b5b
- .quad 0x5b5b5b5b5b5b5b5b
-.Ltouppermask:
- .quad 0x2020202020202020
- .quad 0x2020202020202020
- .previous
-
-# ifdef PIC
-# define UCLOW_reg .Lbelowupper@GOTOFF(%ebx)
-# define UCHIGH_reg .Ltopupper@GOTOFF(%ebx)
-# define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx)
-# else
-# define UCLOW_reg .Lbelowupper
-# define UCHIGH_reg .Ltopupper
-# define LCQWORD_reg .Ltouppermask
-# endif
-#endif
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- PUSH (REM)
-#endif
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- PUSH (%edi)
-#endif
- mov STR1(%esp), %edx
- mov STR2(%esp), %eax
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- movl CNT(%esp), REM
- test REM, REM
- je L(eq)
-#endif
- mov %dx, %cx
- and $0xfff, %cx
- cmp $0xff0, %cx
- ja L(first4bytes)
- movdqu (%edx), %xmm2
- mov %eax, %ecx
- and $0xfff, %ecx
- cmp $0xff0, %ecx
- ja L(first4bytes)
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# define TOLOWER(reg1, reg2) \
- movdqa reg1, %xmm3; \
- movdqa UCHIGH_reg, %xmm4; \
- movdqa reg2, %xmm5; \
- movdqa UCHIGH_reg, %xmm6; \
- pcmpgtb UCLOW_reg, %xmm3; \
- pcmpgtb reg1, %xmm4; \
- pcmpgtb UCLOW_reg, %xmm5; \
- pcmpgtb reg2, %xmm6; \
- pand %xmm4, %xmm3; \
- pand %xmm6, %xmm5; \
- pand LCQWORD_reg, %xmm3; \
- pand LCQWORD_reg, %xmm5; \
- por %xmm3, reg1; \
- por %xmm5, reg2
-
- movdqu (%eax), %xmm1
- TOLOWER (%xmm2, %xmm1)
- movd %xmm2, %ecx
- movd %xmm1, %edi
- movdqa %xmm2, %xmm3
- movdqa %xmm1, %xmm4
- cmpl %edi, %ecx
-#else
-# define TOLOWER(reg1, reg)
-
- movd %xmm2, %ecx
- cmp (%eax), %ecx
-#endif
- jne L(less4bytes)
-#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
- movdqu (%eax), %xmm1
-#endif
- pxor %xmm2, %xmm1
- pxor %xmm0, %xmm0
- ptest %xmm1, %xmm0
- jnc L(less16bytes)
- pcmpeqb %xmm0, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- sub $16, REM
- jbe L(eq)
-#endif
- add $16, %edx
- add $16, %eax
-L(first4bytes):
- movzbl (%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movzbl (%edx), %edi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
- cmpl %ecx, %edi
-#else
- cmpb %cl, (%edx)
-#endif
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $1, REM
- je L(eq)
-#endif
-
- movzbl 1(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movzbl 1(%edx), %edi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
- cmpl %ecx, %edi
-#else
- cmpb %cl, 1(%edx)
-#endif
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $2, REM
- je L(eq)
-#endif
- movzbl 2(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movzbl 2(%edx), %edi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
- cmpl %ecx, %edi
-#else
- cmpb %cl, 2(%edx)
-#endif
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $3, REM
- je L(eq)
-#endif
- movzbl 3(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movzbl 3(%edx), %edi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
- cmpl %ecx, %edi
-#else
- cmpb %cl, 3(%edx)
-#endif
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $4, REM
- je L(eq)
-#endif
- movzbl 4(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movzbl 4(%edx), %edi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
- cmpl %ecx, %edi
-#else
- cmpb %cl, 4(%edx)
-#endif
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $5, REM
- je L(eq)
-#endif
- movzbl 5(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movzbl 5(%edx), %edi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
- cmpl %ecx, %edi
-#else
- cmpb %cl, 5(%edx)
-#endif
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $6, REM
- je L(eq)
-#endif
- movzbl 6(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movzbl 6(%edx), %edi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
- cmpl %ecx, %edi
-#else
- cmpb %cl, 6(%edx)
-#endif
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $7, REM
- je L(eq)
-#endif
- movzbl 7(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movzbl 7(%edx), %edi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
- cmpl %ecx, %edi
-#else
- cmpb %cl, 7(%edx)
-#endif
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- sub $8, REM
- je L(eq)
-#endif
- add $8, %eax
- add $8, %edx
-
-#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
- PUSH (%edi)
-#endif
- PUSH (%esi)
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cfi_remember_state
-#endif
- mov %edx, %edi
- mov %eax, %esi
- xorl %eax, %eax
-L(check_offset):
- movl %edi, %edx
- movl %esi, %ecx
- andl $0xfff, %edx
- andl $0xfff, %ecx
- cmpl %edx, %ecx
- cmovl %edx, %ecx
- lea -0xff0(%ecx), %edx
- sub %edx, %edi
- sub %edx, %esi
- testl %edx, %edx
- jg L(crosspage)
-L(loop):
- movdqu (%esi,%edx), %xmm2
- movdqu (%edi,%edx), %xmm1
- TOLOWER (%xmm2, %xmm1)
- pcmpistri $0x1a, %xmm2, %xmm1
- jbe L(end)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- sub $16, REM
- jbe L(more16byteseq)
-#endif
-
- add $16, %edx
- jle L(loop)
-L(crosspage):
- movzbl (%edi,%edx), %eax
- movzbl (%esi,%edx), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-# endif
-#endif
- subl %ecx, %eax
- jne L(ret)
- testl %ecx, %ecx
- je L(ret)
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- sub $1, REM
- jbe L(more16byteseq)
-#endif
- inc %edx
- cmp $15, %edx
- jle L(crosspage)
- add %edx, %edi
- add %edx, %esi
- jmp L(check_offset)
-
- .p2align 4
-L(end):
- jnc L(ret)
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- sub %ecx, REM
- jbe L(more16byteseq)
-#endif
- lea (%ecx,%edx), %ecx
- movzbl (%edi,%ecx), %eax
- movzbl (%esi,%ecx), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
-# endif
-#endif
- subl %ecx, %eax
-L(ret):
- POP (%esi)
- POP (%edi)
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- POP (REM)
-#endif
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
- POP (%ebx)
-# endif
-#endif
- ret
-
- .p2align 4
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cfi_restore_state
-L(more16byteseq):
- POP (%esi)
-# ifdef USE_AS_STRNCMP
- POP (%edi)
-# endif
-#endif
-L(eq):
- xorl %eax, %eax
- RETURN
-
-L(neq):
- mov $1, %eax
- ja L(neq_bigger)
- neg %eax
-L(neq_bigger):
- RETURN
-
-L(less16bytes):
- add $0xfefefeff, %ecx
- jnc L(less4bytes)
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movd %xmm3, %edi
- xor %edi, %ecx
-#else
- xor (%edx), %ecx
-#endif
- or $0xfefefeff, %ecx
- add $1, %ecx
- jnz L(less4bytes)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $4, REM
- jbe L(eq)
-#endif
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- psrldq $4, %xmm3
- psrldq $4, %xmm4
- movd %xmm3, %ecx
- movd %xmm4, %edi
- cmp %edi, %ecx
- mov %ecx, %edi
-#else
- mov 4(%edx), %ecx
- cmp 4(%eax), %ecx
-#endif
- jne L(more4bytes)
- add $0xfefefeff, %ecx
- jnc L(more4bytes)
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- xor %edi, %ecx
-#else
- xor 4(%edx), %ecx
-#endif
- or $0xfefefeff, %ecx
- add $1, %ecx
- jnz L(more4bytes)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- sub $8, REM
- jbe L(eq)
-#endif
-
- add $8, %edx
- add $8, %eax
-L(less4bytes):
-
- movzbl (%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movzbl (%edx), %edi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
- cmpl %ecx, %edi
-#else
- cmpb %cl, (%edx)
-#endif
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $1, REM
- je L(eq)
-#endif
- movzbl 1(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movzbl 1(%edx), %edi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
- cmpl %ecx, %edi
-#else
- cmpb %cl, 1(%edx)
-#endif
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $2, REM
- je L(eq)
-#endif
-
- movzbl 2(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movzbl 2(%edx), %edi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
- cmpl %ecx, %edi
-#else
- cmpb %cl, 2(%edx)
-#endif
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $3, REM
- je L(eq)
-#endif
- movzbl 3(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movzbl 3(%edx), %edi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
- cmpl %ecx, %edi
-#else
- cmpb %cl, 3(%edx)
-#endif
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
-L(more4bytes):
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $4, REM
- je L(eq)
-#endif
- movzbl 4(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movzbl 4(%edx), %edi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
- cmpl %ecx, %edi
-#else
- cmpb %cl, 4(%edx)
-#endif
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $5, REM
- je L(eq)
-#endif
- movzbl 5(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movzbl 5(%edx), %edi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
- cmpl %ecx, %edi
-#else
- cmpb %cl, 5(%edx)
-#endif
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $6, REM
- je L(eq)
-#endif
- movzbl 6(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movzbl 6(%edx), %edi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
- cmpl %ecx, %edi
-#else
- cmpb %cl, 6(%edx)
-#endif
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $7, REM
- je L(eq)
-#endif
- movzbl 7(%eax), %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movzbl 7(%edx), %edi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%edi,4), %edi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%edi,4), %edi
-# endif
- cmpl %ecx, %edi
-#else
- cmpb %cl, 7(%edx)
-#endif
- jne L(neq)
- jmp L(eq)
-
-END (STRCMP)
-
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcmp-ssse3.S b/sysdeps/i386/i686/multiarch/strcmp-ssse3.S
deleted file mode 100644
index b25cc3e068..0000000000
--- a/sysdeps/i386/i686/multiarch/strcmp-ssse3.S
+++ /dev/null
@@ -1,2810 +0,0 @@
-/* strcmp with SSSE3
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-#define PUSH(REG) pushl REG; CFI_PUSH (REG)
-#define POP(REG) popl REG; CFI_POP (REG)
-
-#ifdef USE_AS_STRNCMP
-# ifndef STRCMP
-# define STRCMP __strncmp_ssse3
-# endif
-# define STR1 8
-# define STR2 STR1+4
-# define CNT STR2+4
-# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM)
-# define UPDATE_STRNCMP_COUNTER \
- /* calculate left number to compare */ \
- mov $16, %esi; \
- sub %ecx, %esi; \
- cmp %esi, REM; \
- jbe L(more8byteseq); \
- sub %esi, REM
-# define FLAGS %ebx
-# define REM %ebp
-#elif defined USE_AS_STRCASECMP_L
-# include "locale-defines.h"
-# ifndef STRCMP
-# define STRCMP __strcasecmp_l_ssse3
-# endif
-# ifdef PIC
-# define STR1 8
-# else
-# define STR1 4
-# endif
-# define STR2 STR1+4
-# define LOCALE 12 /* Loaded before the adjustment. */
-# ifdef PIC
-# define RETURN POP (%ebx); ret; .p2align 4; CFI_PUSH (%ebx)
-# else
-# define RETURN ret; .p2align 4
-# endif
-# define UPDATE_STRNCMP_COUNTER
-# define FLAGS (%esp)
-# define NONASCII __strcasecmp_nonascii
-#elif defined USE_AS_STRNCASECMP_L
-# include "locale-defines.h"
-# ifndef STRCMP
-# define STRCMP __strncasecmp_l_ssse3
-# endif
-# ifdef PIC
-# define STR1 12
-# else
-# define STR1 8
-# endif
-# define STR2 STR1+4
-# define CNT STR2+4
-# define LOCALE 16 /* Loaded before the adjustment. */
-# ifdef PIC
-# define RETURN POP (REM); POP (%ebx); ret; \
- .p2align 4; CFI_PUSH (%ebx); CFI_PUSH (REM)
-# else
-# define RETURN POP (REM); ret; .p2align 4; CFI_PUSH (REM)
-# endif
-# define UPDATE_STRNCMP_COUNTER \
- /* calculate left number to compare */ \
- mov $16, %esi; \
- sub %ecx, %esi; \
- cmp %esi, REM; \
- jbe L(more8byteseq); \
- sub %esi, REM
-# define FLAGS (%esp)
-# define REM %ebp
-# define NONASCII __strncasecmp_nonascii
-#else
-# ifndef STRCMP
-# define STRCMP __strcmp_ssse3
-# endif
-# define STR1 4
-# define STR2 STR1+4
-# define RETURN ret; .p2align 4
-# define UPDATE_STRNCMP_COUNTER
-# define FLAGS %ebx
-#endif
-
- .section .text.ssse3,"ax",@progbits
-
-#ifdef USE_AS_STRCASECMP_L
-ENTRY (__strcasecmp_ssse3)
-# ifdef PIC
- PUSH (%ebx)
- LOAD_PIC_REG(bx)
- movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
-# ifdef NO_TLS_DIRECT_SEG_REFS
- addl %gs:0, %eax
- movl (%eax), %eax
-# else
- movl %gs:(%eax), %eax
-# endif
-# else
-# ifdef NO_TLS_DIRECT_SEG_REFS
- movl %gs:0, %eax
- movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax
-# else
- movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax
-# endif
-# endif
-# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
- movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
-# else
- movl (%eax), %eax
-# endif
- testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
-# ifdef PIC
- je L(ascii)
- POP (%ebx)
- jmp __strcasecmp_nonascii
-# else
- jne __strcasecmp_nonascii
- jmp L(ascii)
-# endif
-END (__strcasecmp_ssse3)
-#endif
-
-#ifdef USE_AS_STRNCASECMP_L
-ENTRY (__strncasecmp_ssse3)
-# ifdef PIC
- PUSH (%ebx)
- LOAD_PIC_REG(bx)
- movl __libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
-# ifdef NO_TLS_DIRECT_SEG_REFS
- addl %gs:0, %eax
- movl (%eax), %eax
-# else
- movl %gs:(%eax), %eax
-# endif
-# else
-# ifdef NO_TLS_DIRECT_SEG_REFS
- movl %gs:0, %eax
- movl __libc_tsd_LOCALE@NTPOFF(%eax), %eax
-# else
- movl %gs:__libc_tsd_LOCALE@NTPOFF, %eax
-# endif
-# endif
-# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
- movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
-# else
- movl (%eax), %eax
-# endif
- testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
-# ifdef PIC
- je L(ascii)
- POP (%ebx)
- jmp __strncasecmp_nonascii
-# else
- jne __strncasecmp_nonascii
- jmp L(ascii)
-# endif
-END (__strncasecmp_ssse3)
-#endif
-
-ENTRY (STRCMP)
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movl LOCALE(%esp), %eax
-# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
- movl LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
-# else
- movl (%eax), %eax
-# endif
- testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
- jne NONASCII
-
-# ifdef PIC
- PUSH (%ebx)
- LOAD_PIC_REG(bx)
-# endif
-L(ascii):
- .section .rodata.cst16,"aM",@progbits,16
- .align 16
-.Lbelowupper:
- .quad 0x4040404040404040
- .quad 0x4040404040404040
-.Ltopupper:
- .quad 0x5b5b5b5b5b5b5b5b
- .quad 0x5b5b5b5b5b5b5b5b
-.Ltouppermask:
- .quad 0x2020202020202020
- .quad 0x2020202020202020
- .previous
-
-# ifdef PIC
-# define UCLOW_reg .Lbelowupper@GOTOFF(%ebx)
-# define UCHIGH_reg .Ltopupper@GOTOFF(%ebx)
-# define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx)
-# else
-# define UCLOW_reg .Lbelowupper
-# define UCHIGH_reg .Ltopupper
-# define LCQWORD_reg .Ltouppermask
-# endif
-#endif
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- PUSH (REM)
-#endif
-
- movl STR1(%esp), %edx
- movl STR2(%esp), %eax
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- movl CNT(%esp), REM
- cmp $16, REM
- jb L(less16bytes_sncmp)
-#elif !defined USE_AS_STRCASECMP_L
- movzbl (%eax), %ecx
- cmpb %cl, (%edx)
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
- movzbl 1(%eax), %ecx
- cmpb %cl, 1(%edx)
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
- movzbl 2(%eax), %ecx
- cmpb %cl, 2(%edx)
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
- movzbl 3(%eax), %ecx
- cmpb %cl, 3(%edx)
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
- movzbl 4(%eax), %ecx
- cmpb %cl, 4(%edx)
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
- movzbl 5(%eax), %ecx
- cmpb %cl, 5(%edx)
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
- movzbl 6(%eax), %ecx
- cmpb %cl, 6(%edx)
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
- movzbl 7(%eax), %ecx
- cmpb %cl, 7(%edx)
- jne L(neq)
- cmpl $0, %ecx
- je L(eq)
-
- add $8, %edx
- add $8, %eax
-#endif
- movl %edx, %ecx
- and $0xfff, %ecx
- cmp $0xff0, %ecx
- ja L(crosspage)
- mov %eax, %ecx
- and $0xfff, %ecx
- cmp $0xff0, %ecx
- ja L(crosspage)
- pxor %xmm0, %xmm0
- movlpd (%eax), %xmm1
- movlpd (%edx), %xmm2
- movhpd 8(%eax), %xmm1
- movhpd 8(%edx), %xmm2
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# define TOLOWER(reg1, reg2) \
- movdqa reg1, %xmm5; \
- movdqa reg2, %xmm7; \
- movdqa UCHIGH_reg, %xmm6; \
- pcmpgtb UCLOW_reg, %xmm5; \
- pcmpgtb UCLOW_reg, %xmm7; \
- pcmpgtb reg1, %xmm6; \
- pand %xmm6, %xmm5; \
- movdqa UCHIGH_reg, %xmm6; \
- pcmpgtb reg2, %xmm6; \
- pand %xmm6, %xmm7; \
- pand LCQWORD_reg, %xmm5; \
- por %xmm5, reg1; \
- pand LCQWORD_reg, %xmm7; \
- por %xmm7, reg2
- TOLOWER (%xmm1, %xmm2)
-#else
-# define TOLOWER(reg1, reg2)
-#endif
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %ecx
- sub $0xffff, %ecx
- jnz L(less16bytes)
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(eq)
-#endif
- add $16, %eax
- add $16, %edx
-
-L(crosspage):
-
-#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
- PUSH (FLAGS)
-#endif
- PUSH (%edi)
- PUSH (%esi)
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- pushl $0
- cfi_adjust_cfa_offset (4)
-#endif
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cfi_remember_state
-#endif
-
- movl %edx, %edi
- movl %eax, %ecx
- and $0xf, %ecx
- and $0xf, %edi
- xor %ecx, %eax
- xor %edi, %edx
-#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
- xor FLAGS, FLAGS
-#endif
- cmp %edi, %ecx
- je L(ashr_0)
- ja L(bigger)
- orl $0x20, FLAGS
- xchg %edx, %eax
- xchg %ecx, %edi
-L(bigger):
- lea 15(%edi), %edi
- sub %ecx, %edi
- cmp $8, %edi
- jle L(ashr_less_8)
- cmp $14, %edi
- je L(ashr_15)
- cmp $13, %edi
- je L(ashr_14)
- cmp $12, %edi
- je L(ashr_13)
- cmp $11, %edi
- je L(ashr_12)
- cmp $10, %edi
- je L(ashr_11)
- cmp $9, %edi
- je L(ashr_10)
-L(ashr_less_8):
- je L(ashr_9)
- cmp $7, %edi
- je L(ashr_8)
- cmp $6, %edi
- je L(ashr_7)
- cmp $5, %edi
- je L(ashr_6)
- cmp $4, %edi
- je L(ashr_5)
- cmp $3, %edi
- je L(ashr_4)
- cmp $2, %edi
- je L(ashr_3)
- cmp $1, %edi
- je L(ashr_2)
- cmp $0, %edi
- je L(ashr_1)
-
-/*
- * The following cases will be handled by ashr_0
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
- * n(0~15) n(0~15) 15(15+ n-n) ashr_0
- */
- .p2align 4
-L(ashr_0):
- mov $0xffff, %esi
- movdqa (%eax), %xmm1
- pxor %xmm0, %xmm0
- pcmpeqb %xmm1, %xmm0
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movdqa (%edx), %xmm2
- TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm2, %xmm1
-#else
- pcmpeqb (%edx), %xmm1
-#endif
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %edi
- shr %cl, %esi
- shr %cl, %edi
- sub %edi, %esi
- mov %ecx, %edi
- jne L(less32bytes)
- UPDATE_STRNCMP_COUNTER
- movl $0x10, FLAGS
- mov $0x10, %ecx
- pxor %xmm0, %xmm0
- .p2align 4
-L(loop_ashr_0):
- movdqa (%eax, %ecx), %xmm1
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- movdqa (%edx, %ecx), %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
-#else
- pcmpeqb %xmm1, %xmm0
- pcmpeqb (%edx, %ecx), %xmm1
-#endif
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- jmp L(loop_ashr_0)
-
-/*
- * The following cases will be handled by ashr_1
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
- * n(15) n -15 0(15 +(n-15) - n) ashr_1
- */
- .p2align 4
-L(ashr_1):
- mov $0xffff, %esi
- pxor %xmm0, %xmm0
- movdqa (%edx), %xmm2
- movdqa (%eax), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $15, %xmm2
- TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
- pmovmskb %xmm2, %edi
- shr %cl, %esi
- shr %cl, %edi
- sub %edi, %esi
- lea -15(%ecx), %edi
- jnz L(less32bytes)
-
- UPDATE_STRNCMP_COUNTER
-
- movdqa (%edx), %xmm3
- pxor %xmm0, %xmm0
- mov $16, %ecx
- orl $1, FLAGS
- lea 1(%edx), %edi
- and $0xfff, %edi
- sub $0x1000, %edi
-
- .p2align 4
-L(loop_ashr_1):
- add $16, %edi
- jg L(nibble_ashr_1)
-
-L(gobble_ashr_1):
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $1, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
-
- add $16, %ecx
- movdqa %xmm4, %xmm3
-
- add $16, %edi
- jg L(nibble_ashr_1)
-
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $1, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
- jmp L(loop_ashr_1)
-
- .p2align 4
-L(nibble_ashr_1):
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %esi
- test $0xfffe, %esi
- jnz L(ashr_1_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $15, REM
- jbe L(ashr_1_exittail)
-#endif
- pxor %xmm0, %xmm0
- sub $0x1000, %edi
- jmp L(gobble_ashr_1)
-
- .p2align 4
-L(ashr_1_exittail):
- movdqa (%eax, %ecx), %xmm1
- psrldq $1, %xmm0
- psrldq $1, %xmm3
- jmp L(aftertail)
-
-/*
- * The following cases will be handled by ashr_2
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
- * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
- */
- .p2align 4
-L(ashr_2):
- mov $0xffff, %esi
- pxor %xmm0, %xmm0
- movdqa (%edx), %xmm2
- movdqa (%eax), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $14, %xmm2
- TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
- pmovmskb %xmm2, %edi
- shr %cl, %esi
- shr %cl, %edi
- sub %edi, %esi
- lea -14(%ecx), %edi
- jnz L(less32bytes)
-
- UPDATE_STRNCMP_COUNTER
-
- movdqa (%edx), %xmm3
- pxor %xmm0, %xmm0
- mov $16, %ecx
- orl $2, FLAGS
- lea 2(%edx), %edi
- and $0xfff, %edi
- sub $0x1000, %edi
-
- .p2align 4
-L(loop_ashr_2):
- add $16, %edi
- jg L(nibble_ashr_2)
-
-L(gobble_ashr_2):
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $2, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
-
- add $16, %edi
- jg L(nibble_ashr_2)
-
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $2, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
- jmp L(loop_ashr_2)
-
- .p2align 4
-L(nibble_ashr_2):
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %esi
- test $0xfffc, %esi
- jnz L(ashr_2_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $14, REM
- jbe L(ashr_2_exittail)
-#endif
-
- pxor %xmm0, %xmm0
- sub $0x1000, %edi
- jmp L(gobble_ashr_2)
-
- .p2align 4
-L(ashr_2_exittail):
- movdqa (%eax, %ecx), %xmm1
- psrldq $2, %xmm0
- psrldq $2, %xmm3
- jmp L(aftertail)
-
-/*
- * The following cases will be handled by ashr_3
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
- * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
- */
- .p2align 4
-L(ashr_3):
- mov $0xffff, %esi
- pxor %xmm0, %xmm0
- movdqa (%edx), %xmm2
- movdqa (%eax), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $13, %xmm2
- TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
- pmovmskb %xmm2, %edi
- shr %cl, %esi
- shr %cl, %edi
- sub %edi, %esi
- lea -13(%ecx), %edi
- jnz L(less32bytes)
-
- UPDATE_STRNCMP_COUNTER
-
- movdqa (%edx), %xmm3
- pxor %xmm0, %xmm0
- mov $16, %ecx
- orl $3, FLAGS
- lea 3(%edx), %edi
- and $0xfff, %edi
- sub $0x1000, %edi
-
- .p2align 4
-L(loop_ashr_3):
- add $16, %edi
- jg L(nibble_ashr_3)
-
-L(gobble_ashr_3):
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $3, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
-
- add $16, %edi
- jg L(nibble_ashr_3)
-
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $3, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
- jmp L(loop_ashr_3)
-
- .p2align 4
-L(nibble_ashr_3):
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %esi
- test $0xfff8, %esi
- jnz L(ashr_3_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $13, REM
- jbe L(ashr_3_exittail)
-#endif
- pxor %xmm0, %xmm0
- sub $0x1000, %edi
- jmp L(gobble_ashr_3)
-
- .p2align 4
-L(ashr_3_exittail):
- movdqa (%eax, %ecx), %xmm1
- psrldq $3, %xmm0
- psrldq $3, %xmm3
- jmp L(aftertail)
-
-/*
- * The following cases will be handled by ashr_4
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
- * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
- */
- .p2align 4
-L(ashr_4):
- mov $0xffff, %esi
- pxor %xmm0, %xmm0
- movdqa (%edx), %xmm2
- movdqa (%eax), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $12, %xmm2
- TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
- pmovmskb %xmm2, %edi
- shr %cl, %esi
- shr %cl, %edi
- sub %edi, %esi
- lea -12(%ecx), %edi
- jnz L(less32bytes)
-
- UPDATE_STRNCMP_COUNTER
-
- movdqa (%edx), %xmm3
- pxor %xmm0, %xmm0
- mov $16, %ecx
- orl $4, FLAGS
- lea 4(%edx), %edi
- and $0xfff, %edi
- sub $0x1000, %edi
-
- .p2align 4
-L(loop_ashr_4):
- add $16, %edi
- jg L(nibble_ashr_4)
-
-L(gobble_ashr_4):
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $4, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
-
- add $16, %ecx
- movdqa %xmm4, %xmm3
-
- add $16, %edi
- jg L(nibble_ashr_4)
-
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $4, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
-
- add $16, %ecx
- movdqa %xmm4, %xmm3
- jmp L(loop_ashr_4)
-
- .p2align 4
-L(nibble_ashr_4):
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %esi
- test $0xfff0, %esi
- jnz L(ashr_4_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $12, REM
- jbe L(ashr_4_exittail)
-#endif
-
- pxor %xmm0, %xmm0
- sub $0x1000, %edi
- jmp L(gobble_ashr_4)
-
- .p2align 4
-L(ashr_4_exittail):
- movdqa (%eax, %ecx), %xmm1
- psrldq $4, %xmm0
- psrldq $4, %xmm3
- jmp L(aftertail)
-
-/*
- * The following cases will be handled by ashr_5
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
- * n(11~15) n -11 4(15 +(n-11) - n) ashr_5
- */
- .p2align 4
-L(ashr_5):
- mov $0xffff, %esi
- pxor %xmm0, %xmm0
- movdqa (%edx), %xmm2
- movdqa (%eax), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $11, %xmm2
- TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
- pmovmskb %xmm2, %edi
- shr %cl, %esi
- shr %cl, %edi
- sub %edi, %esi
- lea -11(%ecx), %edi
- jnz L(less32bytes)
-
- UPDATE_STRNCMP_COUNTER
-
- movdqa (%edx), %xmm3
- pxor %xmm0, %xmm0
- mov $16, %ecx
- orl $5, FLAGS
- lea 5(%edx), %edi
- and $0xfff, %edi
- sub $0x1000, %edi
-
- .p2align 4
-L(loop_ashr_5):
- add $16, %edi
- jg L(nibble_ashr_5)
-
-L(gobble_ashr_5):
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $5, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
-
- add $16, %edi
- jg L(nibble_ashr_5)
-
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $5, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
- jmp L(loop_ashr_5)
-
- .p2align 4
-L(nibble_ashr_5):
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %esi
- test $0xffe0, %esi
- jnz L(ashr_5_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $11, REM
- jbe L(ashr_5_exittail)
-#endif
- pxor %xmm0, %xmm0
- sub $0x1000, %edi
- jmp L(gobble_ashr_5)
-
- .p2align 4
-L(ashr_5_exittail):
- movdqa (%eax, %ecx), %xmm1
- psrldq $5, %xmm0
- psrldq $5, %xmm3
- jmp L(aftertail)
-
-/*
- * The following cases will be handled by ashr_6
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
- * n(10~15) n -10 5(15 +(n-10) - n) ashr_6
- */
-
- .p2align 4
-L(ashr_6):
- mov $0xffff, %esi
- pxor %xmm0, %xmm0
- movdqa (%edx), %xmm2
- movdqa (%eax), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $10, %xmm2
- TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
- pmovmskb %xmm2, %edi
- shr %cl, %esi
- shr %cl, %edi
- sub %edi, %esi
- lea -10(%ecx), %edi
- jnz L(less32bytes)
-
- UPDATE_STRNCMP_COUNTER
-
- movdqa (%edx), %xmm3
- pxor %xmm0, %xmm0
- mov $16, %ecx
- orl $6, FLAGS
- lea 6(%edx), %edi
- and $0xfff, %edi
- sub $0x1000, %edi
-
- .p2align 4
-L(loop_ashr_6):
- add $16, %edi
- jg L(nibble_ashr_6)
-
-L(gobble_ashr_6):
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $6, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
-
- add $16, %ecx
- movdqa %xmm4, %xmm3
-
- add $16, %edi
- jg L(nibble_ashr_6)
-
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $6, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
-
- add $16, %ecx
- movdqa %xmm4, %xmm3
- jmp L(loop_ashr_6)
-
- .p2align 4
-L(nibble_ashr_6):
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %esi
- test $0xffc0, %esi
- jnz L(ashr_6_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $10, REM
- jbe L(ashr_6_exittail)
-#endif
- pxor %xmm0, %xmm0
- sub $0x1000, %edi
- jmp L(gobble_ashr_6)
-
- .p2align 4
-L(ashr_6_exittail):
- movdqa (%eax, %ecx), %xmm1
- psrldq $6, %xmm0
- psrldq $6, %xmm3
- jmp L(aftertail)
-
-/*
- * The following cases will be handled by ashr_7
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
- * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7
- */
-
- .p2align 4
-L(ashr_7):
- mov $0xffff, %esi
- pxor %xmm0, %xmm0
- movdqa (%edx), %xmm2
- movdqa (%eax), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $9, %xmm2
- TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
- pmovmskb %xmm2, %edi
- shr %cl, %esi
- shr %cl, %edi
- sub %edi, %esi
- lea -9(%ecx), %edi
- jnz L(less32bytes)
-
- UPDATE_STRNCMP_COUNTER
-
- movdqa (%edx), %xmm3
- pxor %xmm0, %xmm0
- mov $16, %ecx
- orl $7, FLAGS
- lea 8(%edx), %edi
- and $0xfff, %edi
- sub $0x1000, %edi
-
- .p2align 4
-L(loop_ashr_7):
- add $16, %edi
- jg L(nibble_ashr_7)
-
-L(gobble_ashr_7):
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $7, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
-
- add $16, %ecx
- movdqa %xmm4, %xmm3
-
- add $16, %edi
- jg L(nibble_ashr_7)
-
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $7, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
-
- add $16, %ecx
- movdqa %xmm4, %xmm3
- jmp L(loop_ashr_7)
-
- .p2align 4
-L(nibble_ashr_7):
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %esi
- test $0xff80, %esi
- jnz L(ashr_7_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $9, REM
- jbe L(ashr_7_exittail)
-#endif
- pxor %xmm0, %xmm0
- pxor %xmm0, %xmm0
- sub $0x1000, %edi
- jmp L(gobble_ashr_7)
-
- .p2align 4
-L(ashr_7_exittail):
- movdqa (%eax, %ecx), %xmm1
- psrldq $7, %xmm0
- psrldq $7, %xmm3
- jmp L(aftertail)
-
-/*
- * The following cases will be handled by ashr_8
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
- * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8
- */
- .p2align 4
-L(ashr_8):
- mov $0xffff, %esi
- pxor %xmm0, %xmm0
- movdqa (%edx), %xmm2
- movdqa (%eax), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $8, %xmm2
- TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
- pmovmskb %xmm2, %edi
- shr %cl, %esi
- shr %cl, %edi
- sub %edi, %esi
- lea -8(%ecx), %edi
- jnz L(less32bytes)
-
- UPDATE_STRNCMP_COUNTER
-
- movdqa (%edx), %xmm3
- pxor %xmm0, %xmm0
- mov $16, %ecx
- orl $8, FLAGS
- lea 8(%edx), %edi
- and $0xfff, %edi
- sub $0x1000, %edi
-
- .p2align 4
-L(loop_ashr_8):
- add $16, %edi
- jg L(nibble_ashr_8)
-
-L(gobble_ashr_8):
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $8, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
-
- add $16, %edi
- jg L(nibble_ashr_8)
-
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $8, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
- jmp L(loop_ashr_8)
-
- .p2align 4
-L(nibble_ashr_8):
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %esi
- test $0xff00, %esi
- jnz L(ashr_8_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $8, REM
- jbe L(ashr_8_exittail)
-#endif
- pxor %xmm0, %xmm0
- pxor %xmm0, %xmm0
- sub $0x1000, %edi
- jmp L(gobble_ashr_8)
-
- .p2align 4
-L(ashr_8_exittail):
- movdqa (%eax, %ecx), %xmm1
- psrldq $8, %xmm0
- psrldq $8, %xmm3
- jmp L(aftertail)
-
-/*
- * The following cases will be handled by ashr_9
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
- * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9
- */
- .p2align 4
-L(ashr_9):
- mov $0xffff, %esi
- pxor %xmm0, %xmm0
- movdqa (%edx), %xmm2
- movdqa (%eax), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $7, %xmm2
- TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
- pmovmskb %xmm2, %edi
- shr %cl, %esi
- shr %cl, %edi
- sub %edi, %esi
- lea -7(%ecx), %edi
- jnz L(less32bytes)
-
- UPDATE_STRNCMP_COUNTER
-
- movdqa (%edx), %xmm3
- pxor %xmm0, %xmm0
- mov $16, %ecx
- orl $9, FLAGS
- lea 9(%edx), %edi
- and $0xfff, %edi
- sub $0x1000, %edi
-
- .p2align 4
-L(loop_ashr_9):
- add $16, %edi
- jg L(nibble_ashr_9)
-
-L(gobble_ashr_9):
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $9, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
-
- add $16, %edi
- jg L(nibble_ashr_9)
-
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $9, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
- jmp L(loop_ashr_9)
-
- .p2align 4
-L(nibble_ashr_9):
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %esi
- test $0xfe00, %esi
- jnz L(ashr_9_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $7, REM
- jbe L(ashr_9_exittail)
-#endif
- pxor %xmm0, %xmm0
- sub $0x1000, %edi
- jmp L(gobble_ashr_9)
-
- .p2align 4
-L(ashr_9_exittail):
- movdqa (%eax, %ecx), %xmm1
- psrldq $9, %xmm0
- psrldq $9, %xmm3
- jmp L(aftertail)
-
-/*
- * The following cases will be handled by ashr_10
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
- * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10
- */
- .p2align 4
-L(ashr_10):
- mov $0xffff, %esi
- pxor %xmm0, %xmm0
- movdqa (%edx), %xmm2
- movdqa (%eax), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $6, %xmm2
- TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
- pmovmskb %xmm2, %edi
- shr %cl, %esi
- shr %cl, %edi
- sub %edi, %esi
- lea -6(%ecx), %edi
- jnz L(less32bytes)
-
- UPDATE_STRNCMP_COUNTER
-
- movdqa (%edx), %xmm3
- pxor %xmm0, %xmm0
- mov $16, %ecx
- orl $10, FLAGS
- lea 10(%edx), %edi
- and $0xfff, %edi
- sub $0x1000, %edi
-
- .p2align 4
-L(loop_ashr_10):
- add $16, %edi
- jg L(nibble_ashr_10)
-
-L(gobble_ashr_10):
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $10, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
-
- add $16, %edi
- jg L(nibble_ashr_10)
-
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $10, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
- jmp L(loop_ashr_10)
-
- .p2align 4
-L(nibble_ashr_10):
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %esi
- test $0xfc00, %esi
- jnz L(ashr_10_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $6, REM
- jbe L(ashr_10_exittail)
-#endif
- pxor %xmm0, %xmm0
- sub $0x1000, %edi
- jmp L(gobble_ashr_10)
-
- .p2align 4
-L(ashr_10_exittail):
- movdqa (%eax, %ecx), %xmm1
- psrldq $10, %xmm0
- psrldq $10, %xmm3
- jmp L(aftertail)
-
-/*
- * The following cases will be handled by ashr_11
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
- * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11
- */
- .p2align 4
-L(ashr_11):
- mov $0xffff, %esi
- pxor %xmm0, %xmm0
- movdqa (%edx), %xmm2
- movdqa (%eax), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $5, %xmm2
- TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
- pmovmskb %xmm2, %edi
- shr %cl, %esi
- shr %cl, %edi
- sub %edi, %esi
- lea -5(%ecx), %edi
- jnz L(less32bytes)
-
- UPDATE_STRNCMP_COUNTER
-
- movdqa (%edx), %xmm3
- pxor %xmm0, %xmm0
- mov $16, %ecx
- orl $11, FLAGS
- lea 11(%edx), %edi
- and $0xfff, %edi
- sub $0x1000, %edi
-
- .p2align 4
-L(loop_ashr_11):
- add $16, %edi
- jg L(nibble_ashr_11)
-
-L(gobble_ashr_11):
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $11, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
-
- add $16, %edi
- jg L(nibble_ashr_11)
-
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $11, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
- jmp L(loop_ashr_11)
-
- .p2align 4
-L(nibble_ashr_11):
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %esi
- test $0xf800, %esi
- jnz L(ashr_11_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $5, REM
- jbe L(ashr_11_exittail)
-#endif
- pxor %xmm0, %xmm0
- sub $0x1000, %edi
- jmp L(gobble_ashr_11)
-
- .p2align 4
-L(ashr_11_exittail):
- movdqa (%eax, %ecx), %xmm1
- psrldq $11, %xmm0
- psrldq $11, %xmm3
- jmp L(aftertail)
-
-/*
- * The following cases will be handled by ashr_12
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
- * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12
- */
- .p2align 4
-L(ashr_12):
- mov $0xffff, %esi
- pxor %xmm0, %xmm0
- movdqa (%edx), %xmm2
- movdqa (%eax), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $4, %xmm2
- TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
- pmovmskb %xmm2, %edi
- shr %cl, %esi
- shr %cl, %edi
- sub %edi, %esi
- lea -4(%ecx), %edi
- jnz L(less32bytes)
-
- UPDATE_STRNCMP_COUNTER
-
- movdqa (%edx), %xmm3
- pxor %xmm0, %xmm0
- mov $16, %ecx
- orl $12, FLAGS
- lea 12(%edx), %edi
- and $0xfff, %edi
- sub $0x1000, %edi
-
- .p2align 4
-L(loop_ashr_12):
- add $16, %edi
- jg L(nibble_ashr_12)
-
-L(gobble_ashr_12):
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $12, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
-
- add $16, %ecx
- movdqa %xmm4, %xmm3
-
- add $16, %edi
- jg L(nibble_ashr_12)
-
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $12, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
- jmp L(loop_ashr_12)
-
- .p2align 4
-L(nibble_ashr_12):
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %esi
- test $0xf000, %esi
- jnz L(ashr_12_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $4, REM
- jbe L(ashr_12_exittail)
-#endif
- pxor %xmm0, %xmm0
- sub $0x1000, %edi
- jmp L(gobble_ashr_12)
-
- .p2align 4
-L(ashr_12_exittail):
- movdqa (%eax, %ecx), %xmm1
- psrldq $12, %xmm0
- psrldq $12, %xmm3
- jmp L(aftertail)
-
-/*
- * The following cases will be handled by ashr_13
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
- * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13
- */
- .p2align 4
-L(ashr_13):
- mov $0xffff, %esi
- pxor %xmm0, %xmm0
- movdqa (%edx), %xmm2
- movdqa (%eax), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $3, %xmm2
- TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
- pmovmskb %xmm2, %edi
- shr %cl, %esi
- shr %cl, %edi
- sub %edi, %esi
- lea -3(%ecx), %edi
- jnz L(less32bytes)
-
- UPDATE_STRNCMP_COUNTER
-
- movdqa (%edx), %xmm3
- pxor %xmm0, %xmm0
- mov $16, %ecx
- orl $13, FLAGS
- lea 13(%edx), %edi
- and $0xfff, %edi
- sub $0x1000, %edi
-
- .p2align 4
-L(loop_ashr_13):
- add $16, %edi
- jg L(nibble_ashr_13)
-
-L(gobble_ashr_13):
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $13, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
-
- add $16, %edi
- jg L(nibble_ashr_13)
-
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $13, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
- jmp L(loop_ashr_13)
-
- .p2align 4
-L(nibble_ashr_13):
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %esi
- test $0xe000, %esi
- jnz L(ashr_13_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $3, REM
- jbe L(ashr_13_exittail)
-#endif
- pxor %xmm0, %xmm0
- sub $0x1000, %edi
- jmp L(gobble_ashr_13)
-
- .p2align 4
-L(ashr_13_exittail):
- movdqa (%eax, %ecx), %xmm1
- psrldq $13, %xmm0
- psrldq $13, %xmm3
- jmp L(aftertail)
-
-/*
- * The following cases will be handled by ashr_14
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
- * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14
- */
- .p2align 4
-L(ashr_14):
- mov $0xffff, %esi
- pxor %xmm0, %xmm0
- movdqa (%edx), %xmm2
- movdqa (%eax), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $2, %xmm2
- TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
- pmovmskb %xmm2, %edi
- shr %cl, %esi
- shr %cl, %edi
- sub %edi, %esi
- lea -2(%ecx), %edi
- jnz L(less32bytes)
-
- UPDATE_STRNCMP_COUNTER
-
- movdqa (%edx), %xmm3
- pxor %xmm0, %xmm0
- mov $16, %ecx
- orl $14, FLAGS
- lea 14(%edx), %edi
- and $0xfff, %edi
- sub $0x1000, %edi
-
- .p2align 4
-L(loop_ashr_14):
- add $16, %edi
- jg L(nibble_ashr_14)
-
-L(gobble_ashr_14):
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $14, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
-
- add $16, %edi
- jg L(nibble_ashr_14)
-
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $14, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
- jmp L(loop_ashr_14)
-
- .p2align 4
-L(nibble_ashr_14):
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %esi
- test $0xc000, %esi
- jnz L(ashr_14_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $2, REM
- jbe L(ashr_14_exittail)
-#endif
- pxor %xmm0, %xmm0
- sub $0x1000, %edi
- jmp L(gobble_ashr_14)
-
- .p2align 4
-L(ashr_14_exittail):
- movdqa (%eax, %ecx), %xmm1
- psrldq $14, %xmm0
- psrldq $14, %xmm3
- jmp L(aftertail)
-
-/*
- * The following cases will be handled by ashr_14
- * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
- * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15
- */
-
- .p2align 4
-L(ashr_15):
- mov $0xffff, %esi
- pxor %xmm0, %xmm0
- movdqa (%edx), %xmm2
- movdqa (%eax), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $1, %xmm2
- TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
- pmovmskb %xmm2, %edi
- shr %cl, %esi
- shr %cl, %edi
- sub %edi, %esi
- lea -1(%ecx), %edi
- jnz L(less32bytes)
-
- UPDATE_STRNCMP_COUNTER
-
- movdqa (%edx), %xmm3
- pxor %xmm0, %xmm0
- mov $16, %ecx
- orl $15, FLAGS
- lea 15(%edx), %edi
- and $0xfff, %edi
- sub $0x1000, %edi
-
- .p2align 4
-L(loop_ashr_15):
- add $16, %edi
- jg L(nibble_ashr_15)
-
-L(gobble_ashr_15):
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $15, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
-
- add $16, %edi
- jg L(nibble_ashr_15)
-
- movdqa (%eax, %ecx), %xmm1
- movdqa (%edx, %ecx), %xmm2
- movdqa %xmm2, %xmm4
-
- palignr $15, %xmm3, %xmm2
- TOLOWER (%xmm1, %xmm2)
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- sub $0xffff, %esi
- jnz L(exit)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $16, REM
- lea -16(REM), REM
- jbe L(more8byteseq)
-#endif
- add $16, %ecx
- movdqa %xmm4, %xmm3
- jmp L(loop_ashr_15)
-
- .p2align 4
-L(nibble_ashr_15):
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %esi
- test $0x8000, %esi
- jnz L(ashr_15_exittail)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $1, REM
- jbe L(ashr_15_exittail)
-#endif
- pxor %xmm0, %xmm0
- sub $0x1000, %edi
- jmp L(gobble_ashr_15)
-
- .p2align 4
-L(ashr_15_exittail):
- movdqa (%eax, %ecx), %xmm1
- psrldq $15, %xmm0
- psrldq $15, %xmm3
- jmp L(aftertail)
-
- .p2align 4
-L(aftertail):
- TOLOWER (%xmm1, %xmm3)
- pcmpeqb %xmm3, %xmm1
- psubb %xmm0, %xmm1
- pmovmskb %xmm1, %esi
- not %esi
-L(exit):
- mov FLAGS, %edi
- and $0x1f, %edi
- lea -16(%edi, %ecx), %edi
-L(less32bytes):
- add %edi, %edx
- add %ecx, %eax
- testl $0x20, FLAGS
- jz L(ret2)
- xchg %eax, %edx
-
- .p2align 4
-L(ret2):
- mov %esi, %ecx
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- addl $4, %esp
- cfi_adjust_cfa_offset (-4)
-#endif
- POP (%esi)
- POP (%edi)
-#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
- POP (FLAGS)
-#endif
-L(less16bytes):
- test %cl, %cl
- jz L(2next_8_bytes)
-
- test $0x01, %cl
- jnz L(Byte0)
-
- test $0x02, %cl
- jnz L(Byte1)
-
- test $0x04, %cl
- jnz L(Byte2)
-
- test $0x08, %cl
- jnz L(Byte3)
-
- test $0x10, %cl
- jnz L(Byte4)
-
- test $0x20, %cl
- jnz L(Byte5)
-
- test $0x40, %cl
- jnz L(Byte6)
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $7, REM
- jbe L(eq)
-#endif
-
- movzx 7(%eax), %ecx
- movzx 7(%edx), %eax
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-# endif
-#endif
-
- sub %ecx, %eax
- RETURN
-
-L(Byte0):
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $0, REM
- jbe L(eq)
-#endif
- movzx (%eax), %ecx
- movzx (%edx), %eax
-
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-# endif
-#endif
-
- sub %ecx, %eax
- RETURN
-
-L(Byte1):
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $1, REM
- jbe L(eq)
-#endif
- movzx 1(%eax), %ecx
- movzx 1(%edx), %eax
-
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-# endif
-#endif
-
- sub %ecx, %eax
- RETURN
-
-L(Byte2):
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $2, REM
- jbe L(eq)
-#endif
- movzx 2(%eax), %ecx
- movzx 2(%edx), %eax
-
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-# endif
-#endif
-
- sub %ecx, %eax
- RETURN
-
-L(Byte3):
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $3, REM
- jbe L(eq)
-#endif
- movzx 3(%eax), %ecx
- movzx 3(%edx), %eax
-
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-# endif
-#endif
-
- sub %ecx, %eax
- RETURN
-
-L(Byte4):
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $4, REM
- jbe L(eq)
-#endif
- movzx 4(%eax), %ecx
- movzx 4(%edx), %eax
-
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-# endif
-#endif
-
- sub %ecx, %eax
- RETURN
-
-L(Byte5):
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $5, REM
- jbe L(eq)
-#endif
- movzx 5(%eax), %ecx
- movzx 5(%edx), %eax
-
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-# endif
-#endif
-
- sub %ecx, %eax
- RETURN
-
-L(Byte6):
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $6, REM
- jbe L(eq)
-#endif
- movzx 6(%eax), %ecx
- movzx 6(%edx), %eax
-
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-# endif
-#endif
-
- sub %ecx, %eax
- RETURN
-
-L(2next_8_bytes):
- add $8, %eax
- add $8, %edx
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $8, REM
- lea -8(REM), REM
- jbe L(eq)
-#endif
-
- test $0x01, %ch
- jnz L(Byte0)
-
- test $0x02, %ch
- jnz L(Byte1)
-
- test $0x04, %ch
- jnz L(Byte2)
-
- test $0x08, %ch
- jnz L(Byte3)
-
- test $0x10, %ch
- jnz L(Byte4)
-
- test $0x20, %ch
- jnz L(Byte5)
-
- test $0x40, %ch
- jnz L(Byte6)
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- cmp $7, REM
- jbe L(eq)
-#endif
- movzx 7(%eax), %ecx
- movzx 7(%edx), %eax
-
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
-# endif
-#endif
-
- sub %ecx, %eax
- RETURN
-
-#ifdef USE_AS_STRNCMP
-L(neq_sncmp):
-#endif
-L(neq):
- mov $1, %eax
- ja L(neq_bigger)
- neg %eax
-L(neq_bigger):
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
- addl $4, %esp
- cfi_adjust_cfa_offset (-4)
-#endif
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- POP (REM)
-#endif
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
- POP (%ebx)
-# endif
-#endif
- ret
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- .p2align 4
- cfi_restore_state
-L(more8byteseq):
-
-# ifdef USE_AS_STRNCASECMP_L
- addl $4, %esp
- cfi_adjust_cfa_offset (-4)
-# endif
- POP (%esi)
- POP (%edi)
-# ifdef USE_AS_STRNCMP
- POP (FLAGS)
-# endif
-#endif
-
-#ifdef USE_AS_STRNCMP
-L(eq_sncmp):
-#endif
-L(eq):
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- POP (REM)
-#endif
-#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef PIC
- POP (%ebx)
-# endif
-#endif
- xorl %eax, %eax
- ret
-
-#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
- .p2align 4
-# if defined USE_AS_STRNCASECMP_L && defined PIC
- CFI_PUSH (%ebx)
-# endif
- CFI_PUSH (REM)
-L(less16bytes_sncmp):
-# ifdef USE_AS_STRNCASECMP_L
- PUSH (%esi)
-# endif
- test REM, REM
- jz L(eq_sncmp)
-
- movzbl (%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
- movzbl (%edx), %esi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-# endif
- cmpl %ecx, %esi
-# else
- cmpb %cl, (%edx)
-# endif
- jne L(neq_sncmp)
- test %cl, %cl
- je L(eq_sncmp)
-
- cmp $1, REM
- je L(eq_sncmp)
-
- movzbl 1(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
- movzbl 1(%edx), %esi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-# endif
- cmpl %ecx, %esi
-# else
- cmpb %cl, 1(%edx)
-# endif
- jne L(neq_sncmp)
- test %cl, %cl
- je L(eq_sncmp)
-
- cmp $2, REM
- je L(eq_sncmp)
-
- movzbl 2(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
- movzbl 2(%edx), %esi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-# endif
- cmpl %ecx, %esi
-# else
- cmpb %cl, 2(%edx)
-# endif
- jne L(neq_sncmp)
- test %cl, %cl
- je L(eq_sncmp)
-
- cmp $3, REM
- je L(eq_sncmp)
-
- movzbl 3(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
- movzbl 3(%edx), %esi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-# endif
- cmpl %ecx, %esi
-# else
- cmpb %cl, 3(%edx)
-# endif
- jne L(neq_sncmp)
- test %cl, %cl
- je L(eq_sncmp)
-
- cmp $4, REM
- je L(eq_sncmp)
-
- movzbl 4(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
- movzbl 4(%edx), %esi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-# endif
- cmpl %ecx, %esi
-# else
- cmpb %cl, 4(%edx)
-# endif
- jne L(neq_sncmp)
- test %cl, %cl
- je L(eq_sncmp)
-
- cmp $5, REM
- je L(eq_sncmp)
-
- movzbl 5(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
- movzbl 5(%edx), %esi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-# endif
- cmpl %ecx, %esi
-# else
- cmpb %cl, 5(%edx)
-# endif
- jne L(neq_sncmp)
- test %cl, %cl
- je L(eq_sncmp)
-
- cmp $6, REM
- je L(eq_sncmp)
-
- movzbl 6(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
- movzbl 6(%edx), %esi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-# endif
- cmpl %ecx, %esi
-# else
- cmpb %cl, 6(%edx)
-# endif
- jne L(neq_sncmp)
- test %cl, %cl
- je L(eq_sncmp)
-
- cmp $7, REM
- je L(eq_sncmp)
-
- movzbl 7(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
- movzbl 7(%edx), %esi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-# endif
- cmpl %ecx, %esi
-# else
- cmpb %cl, 7(%edx)
-# endif
- jne L(neq_sncmp)
- test %cl, %cl
- je L(eq_sncmp)
-
-
- cmp $8, REM
- je L(eq_sncmp)
-
- movzbl 8(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
- movzbl 8(%edx), %esi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-# endif
- cmpl %ecx, %esi
-# else
- cmpb %cl, 8(%edx)
-# endif
- jne L(neq_sncmp)
- test %cl, %cl
- je L(eq_sncmp)
-
- cmp $9, REM
- je L(eq_sncmp)
-
- movzbl 9(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
- movzbl 9(%edx), %esi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-# endif
- cmpl %ecx, %esi
-# else
- cmpb %cl, 9(%edx)
-# endif
- jne L(neq_sncmp)
- test %cl, %cl
- je L(eq_sncmp)
-
- cmp $10, REM
- je L(eq_sncmp)
-
- movzbl 10(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
- movzbl 10(%edx), %esi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-# endif
- cmpl %ecx, %esi
-# else
- cmpb %cl, 10(%edx)
-# endif
- jne L(neq_sncmp)
- test %cl, %cl
- je L(eq_sncmp)
-
- cmp $11, REM
- je L(eq_sncmp)
-
- movzbl 11(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
- movzbl 11(%edx), %esi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-# endif
- cmpl %ecx, %esi
-# else
- cmpb %cl, 11(%edx)
-# endif
- jne L(neq_sncmp)
- test %cl, %cl
- je L(eq_sncmp)
-
-
- cmp $12, REM
- je L(eq_sncmp)
-
- movzbl 12(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
- movzbl 12(%edx), %esi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-# endif
- cmpl %ecx, %esi
-# else
- cmpb %cl, 12(%edx)
-# endif
- jne L(neq_sncmp)
- test %cl, %cl
- je L(eq_sncmp)
-
- cmp $13, REM
- je L(eq_sncmp)
-
- movzbl 13(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
- movzbl 13(%edx), %esi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-# endif
- cmpl %ecx, %esi
-# else
- cmpb %cl, 13(%edx)
-# endif
- jne L(neq_sncmp)
- test %cl, %cl
- je L(eq_sncmp)
-
- cmp $14, REM
- je L(eq_sncmp)
-
- movzbl 14(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
- movzbl 14(%edx), %esi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-# endif
- cmpl %ecx, %esi
-# else
- cmpb %cl, 14(%edx)
-# endif
- jne L(neq_sncmp)
- test %cl, %cl
- je L(eq_sncmp)
-
- cmp $15, REM
- je L(eq_sncmp)
-
- movzbl 15(%eax), %ecx
-# ifdef USE_AS_STRNCASECMP_L
- movzbl 15(%edx), %esi
-# ifdef PIC
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
-# else
- movl _nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
- movl _nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
-# endif
- cmpl %ecx, %esi
-# else
- cmpb %cl, 15(%edx)
-# endif
- jne L(neq_sncmp)
-
-# ifdef USE_AS_STRNCASECMP_L
-L(eq_sncmp):
- POP (%esi)
-# endif
- POP (REM)
-# if defined USE_AS_STRNCASECMP_L && defined PIC
- POP (%ebx)
-# endif
- xor %eax, %eax
- ret
-
-# ifdef USE_AS_STRNCASECMP_L
- .p2align 4
-# ifdef PIC
- CFI_PUSH (%ebx)
-# endif
- CFI_PUSH (REM)
- CFI_PUSH (%esi)
-L(neq_sncmp):
- mov $1, %eax
- mov $-1, %edx
- cmovna %edx, %eax
- POP (%esi)
- POP (REM)
-# ifdef PIC
- POP (%ebx)
-# endif
- ret
-# endif
-#endif
-
-END (STRCMP)
-
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcmp.S b/sysdeps/i386/i686/multiarch/strcmp.S
deleted file mode 100644
index 56de25a4b7..0000000000
--- a/sysdeps/i386/i686/multiarch/strcmp.S
+++ /dev/null
@@ -1,95 +0,0 @@
-/* Multiple versions of strcmp
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#ifdef USE_AS_STRNCMP
-# define STRCMP strncmp
-# define __GI_STRCMP __GI_strncmp
-# define __STRCMP_IA32 __strncmp_ia32
-# define __STRCMP_SSSE3 __strncmp_ssse3
-# define __STRCMP_SSE4_2 __strncmp_sse4_2
-#elif defined USE_AS_STRCASECMP_L
-# define STRCMP __strcasecmp_l
-# define __GI_STRCMP __GI_strcasecmp_l
-# define __STRCMP_IA32 __strcasecmp_l_ia32
-# define __STRCMP_SSSE3 __strcasecmp_l_ssse3
-# define __STRCMP_SSE4_2 __strcasecmp_l_sse4_2
-#elif defined USE_AS_STRNCASECMP_L
-# define STRCMP __strncasecmp_l
-# define __GI_STRCMP __GI_strncasecmp_l
-# define __STRCMP_IA32 __strncasecmp_l_ia32
-# define __STRCMP_SSSE3 __strncasecmp_l_ssse3
-# define __STRCMP_SSE4_2 __strncasecmp_l_sse4_2
-#else
-# define STRCMP strcmp
-# define __GI_STRCMP __GI_strcmp
-# define __STRCMP_IA32 __strcmp_ia32
-# define __STRCMP_SSSE3 __strcmp_ssse3
-# define __STRCMP_SSE4_2 __strcmp_sse4_2
-#endif
-
-/* Define multiple versions only for the definition in libc. Don't
- define multiple versions for strncmp in static library since we
- need strncmp before the initialization happened. */
-#if (defined SHARED || !defined USE_AS_STRNCMP) && IS_IN (libc)
- .text
-ENTRY(STRCMP)
- .type STRCMP, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__STRCMP_IA32)
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- LOAD_FUNC_GOT_EAX (__STRCMP_SSSE3)
- HAS_CPU_FEATURE (SSE4_2)
- jz 2f
- HAS_ARCH_FEATURE (Slow_SSE4_2)
- jnz 2f
- LOAD_FUNC_GOT_EAX (__STRCMP_SSE4_2)
-2: ret
-END(STRCMP)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __STRCMP_IA32, @function; \
- .p2align 4; \
- .globl __STRCMP_IA32; \
- .hidden __STRCMP_IA32; \
- __STRCMP_IA32: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __STRCMP_IA32, .-__STRCMP_IA32
-
-# ifdef SHARED
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
- they will be called without setting up EBX needed for PLT which is
- used by IFUNC. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_STRCMP; __GI_STRCMP = __STRCMP_IA32
-# endif
-#endif
-
-#if !defined USE_AS_STRNCMP && !defined USE_AS_STRCASECMP_L \
- && !defined USE_AS_STRNCASECMP_L
-# include "../strcmp.S"
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcpy-sse2.S b/sysdeps/i386/i686/multiarch/strcpy-sse2.S
deleted file mode 100644
index ed627a5f62..0000000000
--- a/sysdeps/i386/i686/multiarch/strcpy-sse2.S
+++ /dev/null
@@ -1,2250 +0,0 @@
-/* strcpy with SSE2 and unaligned load
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifndef STRCPY
-# define STRCPY __strcpy_sse2
-# endif
-
-# define STR1 PARMS
-# define STR2 STR1+4
-# define LEN STR2+4
-
-# ifdef USE_AS_STRNCPY
-# define PARMS 16
-# define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi)
-# define RETURN POP(%edi); POP(%esi); POP(%ebx); ret; \
- CFI_PUSH(%ebx); CFI_PUSH(%esi); CFI_PUSH(%edi);
-
-# ifdef SHARED
-# define JMPTBL(I, B) I - B
-
-/* Load an entry in a jump table into ECX and branch to it. TABLE is a
- jump table with relative offsets.
- INDEX is a register contains the index into the jump table.
- SCALE is the scale of INDEX. */
-
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- /* We first load PC into ECX. */ \
- SETUP_PIC_REG(cx); \
- /* Get the address of the jump table. */ \
- addl $(TABLE - .), %ecx; \
- /* Get the entry and convert the relative offset to the \
- absolute address. */ \
- addl (%ecx,INDEX,SCALE), %ecx; \
- /* We loaded the jump table and adjusted ECX. Go. */ \
- jmp *%ecx
-# else
-# define JMPTBL(I, B) I
-
-/* Branch to an entry in a jump table. TABLE is a jump table with
- absolute offsets. INDEX is a register contains the index into the
- jump table. SCALE is the scale of INDEX. */
-
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- jmp *TABLE(,INDEX,SCALE)
-# endif
-
-.text
-ENTRY (STRCPY)
- ENTRANCE
- mov STR1(%esp), %edi
- mov STR2(%esp), %esi
- movl LEN(%esp), %ebx
- test %ebx, %ebx
- jz L(ExitZero)
-
- mov %esi, %ecx
-# ifndef USE_AS_STPCPY
- mov %edi, %eax /* save result */
-# endif
- and $15, %ecx
- jz L(SourceStringAlignmentZero)
-
- and $-16, %esi
- pxor %xmm0, %xmm0
- pxor %xmm1, %xmm1
-
- pcmpeqb (%esi), %xmm1
- add %ecx, %ebx
- pmovmskb %xmm1, %edx
- shr %cl, %edx
-# ifdef USE_AS_STPCPY
- cmp $16, %ebx
- jbe L(CopyFrom1To16BytesTailCase2OrCase3)
-# else
- cmp $17, %ebx
- jbe L(CopyFrom1To16BytesTailCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16BytesTail)
-
- pcmpeqb 16(%esi), %xmm0
- pmovmskb %xmm0, %edx
-# ifdef USE_AS_STPCPY
- cmp $32, %ebx
- jbe L(CopyFrom1To32BytesCase2OrCase3)
-# else
- cmp $33, %ebx
- jbe L(CopyFrom1To32BytesCase2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To32Bytes)
-
- movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
- movdqu %xmm1, (%edi)
-
- sub %ecx, %edi
-
-/* If source address alignment != destination address alignment */
- .p2align 4
-L(Unalign16Both):
- mov $16, %ecx
- movdqa (%esi, %ecx), %xmm1
- movaps 16(%esi, %ecx), %xmm2
- movdqu %xmm1, (%edi, %ecx)
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
- sub $48, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnalignedXmm2)
-
- movaps 16(%esi, %ecx), %xmm3
- movdqu %xmm2, (%edi, %ecx)
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnalignedXmm3)
-
- movaps 16(%esi, %ecx), %xmm4
- movdqu %xmm3, (%edi, %ecx)
- pcmpeqb %xmm4, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnalignedXmm4)
-
- movaps 16(%esi, %ecx), %xmm1
- movdqu %xmm4, (%edi, %ecx)
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnalignedXmm1)
-
- movaps 16(%esi, %ecx), %xmm2
- movdqu %xmm1, (%edi, %ecx)
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnalignedXmm2)
-
- movaps 16(%esi, %ecx), %xmm3
- movdqu %xmm2, (%edi, %ecx)
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %edx
- add $16, %ecx
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnalignedXmm3)
-
- movdqu %xmm3, (%edi, %ecx)
- mov %esi, %edx
- lea 16(%esi, %ecx), %esi
- and $-0x40, %esi
- sub %esi, %edx
- sub %edx, %edi
- lea 128(%ebx, %edx), %ebx
-
-L(Unaligned64Loop):
- movaps (%esi), %xmm2
- movaps %xmm2, %xmm4
- movaps 16(%esi), %xmm5
- movaps 32(%esi), %xmm3
- movaps %xmm3, %xmm6
- movaps 48(%esi), %xmm7
- pminub %xmm5, %xmm2
- pminub %xmm7, %xmm3
- pminub %xmm2, %xmm3
- pcmpeqb %xmm0, %xmm3
- pmovmskb %xmm3, %edx
- sub $64, %ebx
- jbe L(UnalignedLeaveCase2OrCase3)
- test %edx, %edx
- jnz L(Unaligned64Leave)
-L(Unaligned64Loop_start):
- add $64, %edi
- add $64, %esi
- movdqu %xmm4, -64(%edi)
- movaps (%esi), %xmm2
- movdqa %xmm2, %xmm4
- movdqu %xmm5, -48(%edi)
- movaps 16(%esi), %xmm5
- pminub %xmm5, %xmm2
- movaps 32(%esi), %xmm3
- movdqu %xmm6, -32(%edi)
- movaps %xmm3, %xmm6
- movdqu %xmm7, -16(%edi)
- movaps 48(%esi), %xmm7
- pminub %xmm7, %xmm3
- pminub %xmm2, %xmm3
- pcmpeqb %xmm0, %xmm3
- pmovmskb %xmm3, %edx
- sub $64, %ebx
- jbe L(UnalignedLeaveCase2OrCase3)
- test %edx, %edx
- jz L(Unaligned64Loop_start)
-L(Unaligned64Leave):
- pxor %xmm1, %xmm1
-
- pcmpeqb %xmm4, %xmm0
- pcmpeqb %xmm5, %xmm1
- pmovmskb %xmm0, %edx
- pmovmskb %xmm1, %ecx
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnaligned_0)
- test %ecx, %ecx
- jnz L(CopyFrom1To16BytesUnaligned_16)
-
- pcmpeqb %xmm6, %xmm0
- pcmpeqb %xmm7, %xmm1
- pmovmskb %xmm0, %edx
- pmovmskb %xmm1, %ecx
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnaligned_32)
-
- bsf %ecx, %edx
- movdqu %xmm4, (%edi)
- movdqu %xmm5, 16(%edi)
- movdqu %xmm6, 32(%edi)
-# ifdef USE_AS_STPCPY
- lea 48(%edi, %edx), %eax
-# endif
- movdqu %xmm7, 48(%edi)
- add $15, %ebx
- sub %edx, %ebx
- lea 49(%edi, %edx), %edi
- jmp L(StrncpyFillTailWithZero)
-
-/* If source address alignment == destination address alignment */
-
-L(SourceStringAlignmentZero):
- pxor %xmm0, %xmm0
- movdqa (%esi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %edx
-# ifdef USE_AS_STPCPY
- cmp $16, %ebx
- jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
-# else
- cmp $17, %ebx
- jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To16BytesTail1)
-
- pcmpeqb 16(%esi), %xmm0
- movdqu %xmm1, (%edi)
- pmovmskb %xmm0, %edx
-# ifdef USE_AS_STPCPY
- cmp $32, %ebx
- jbe L(CopyFrom1To32Bytes1Case2OrCase3)
-# else
- cmp $33, %ebx
- jbe L(CopyFrom1To32Bytes1Case2OrCase3)
-# endif
- test %edx, %edx
- jnz L(CopyFrom1To32Bytes1)
-
- jmp L(Unalign16Both)
-
-/*-----------------End of main part---------------------------*/
-
-/* Case1 */
- .p2align 4
-L(CopyFrom1To16BytesTail):
- sub %ecx, %ebx
- add %ecx, %esi
- bsf %edx, %edx
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
- .p2align 4
-L(CopyFrom1To32Bytes1):
- add $16, %esi
- add $16, %edi
- sub $16, %ebx
-L(CopyFrom1To16BytesTail1):
- bsf %edx, %edx
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
- .p2align 4
-L(CopyFrom1To32Bytes):
- sub %ecx, %ebx
- bsf %edx, %edx
- add %ecx, %esi
- add $16, %edx
- sub %ecx, %edx
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesUnaligned_0):
- bsf %edx, %edx
-# ifdef USE_AS_STPCPY
- lea (%edi, %edx), %eax
-# endif
- movdqu %xmm4, (%edi)
- add $63, %ebx
- sub %edx, %ebx
- lea 1(%edi, %edx), %edi
- jmp L(StrncpyFillTailWithZero)
-
- .p2align 4
-L(CopyFrom1To16BytesUnaligned_16):
- bsf %ecx, %edx
- movdqu %xmm4, (%edi)
-# ifdef USE_AS_STPCPY
- lea 16(%edi, %edx), %eax
-# endif
- movdqu %xmm5, 16(%edi)
- add $47, %ebx
- sub %edx, %ebx
- lea 17(%edi, %edx), %edi
- jmp L(StrncpyFillTailWithZero)
-
- .p2align 4
-L(CopyFrom1To16BytesUnaligned_32):
- bsf %edx, %edx
- movdqu %xmm4, (%edi)
- movdqu %xmm5, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 32(%edi, %edx), %eax
-# endif
- movdqu %xmm6, 32(%edi)
- add $31, %ebx
- sub %edx, %ebx
- lea 33(%edi, %edx), %edi
- jmp L(StrncpyFillTailWithZero)
-
- .p2align 4
-L(CopyFrom1To16BytesUnalignedXmm6):
- movdqu %xmm6, (%edi, %ecx)
- jmp L(CopyFrom1To16BytesXmmExit)
-
- .p2align 4
-L(CopyFrom1To16BytesUnalignedXmm5):
- movdqu %xmm5, (%edi, %ecx)
- jmp L(CopyFrom1To16BytesXmmExit)
-
- .p2align 4
-L(CopyFrom1To16BytesUnalignedXmm4):
- movdqu %xmm4, (%edi, %ecx)
- jmp L(CopyFrom1To16BytesXmmExit)
-
- .p2align 4
-L(CopyFrom1To16BytesUnalignedXmm3):
- movdqu %xmm3, (%edi, %ecx)
- jmp L(CopyFrom1To16BytesXmmExit)
-
- .p2align 4
-L(CopyFrom1To16BytesUnalignedXmm1):
- movdqu %xmm1, (%edi, %ecx)
- jmp L(CopyFrom1To16BytesXmmExit)
-
- .p2align 4
-L(CopyFrom1To16BytesExit):
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
-
-/* Case2 */
-
- .p2align 4
-L(CopyFrom1To16BytesCase2):
- add $16, %ebx
- add %ecx, %edi
- add %ecx, %esi
- bsf %edx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
- .p2align 4
-L(CopyFrom1To32BytesCase2):
- sub %ecx, %ebx
- add %ecx, %esi
- bsf %edx, %edx
- add $16, %edx
- sub %ecx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-L(CopyFrom1To16BytesTailCase2):
- sub %ecx, %ebx
- add %ecx, %esi
- bsf %edx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-L(CopyFrom1To16BytesTail1Case2):
- bsf %edx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
-/* Case2 or Case3, Case3 */
-
- .p2align 4
-L(CopyFrom1To16BytesCase2OrCase3):
- test %edx, %edx
- jnz L(CopyFrom1To16BytesCase2)
-L(CopyFrom1To16BytesCase3):
- add $16, %ebx
- add %ecx, %edi
- add %ecx, %esi
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
- .p2align 4
-L(CopyFrom1To32BytesCase2OrCase3):
- test %edx, %edx
- jnz L(CopyFrom1To32BytesCase2)
- sub %ecx, %ebx
- add %ecx, %esi
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesTailCase2OrCase3):
- test %edx, %edx
- jnz L(CopyFrom1To16BytesTailCase2)
- sub %ecx, %ebx
- add %ecx, %esi
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
- .p2align 4
-L(CopyFrom1To32Bytes1Case2OrCase3):
- add $16, %edi
- add $16, %esi
- sub $16, %ebx
-L(CopyFrom1To16BytesTail1Case2OrCase3):
- test %edx, %edx
- jnz L(CopyFrom1To16BytesTail1Case2)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
- .p2align 4
-L(Exit0):
-# ifdef USE_AS_STPCPY
- mov %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit1):
- movb %dh, (%edi)
-# ifdef USE_AS_STPCPY
- lea (%edi), %eax
-# endif
- sub $1, %ebx
- lea 1(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit2):
- movw (%esi), %dx
- movw %dx, (%edi)
-# ifdef USE_AS_STPCPY
- lea 1(%edi), %eax
-# endif
- sub $2, %ebx
- lea 2(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit3):
- movw (%esi), %cx
- movw %cx, (%edi)
- movb %dh, 2(%edi)
-# ifdef USE_AS_STPCPY
- lea 2(%edi), %eax
-# endif
- sub $3, %ebx
- lea 3(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit4):
- movl (%esi), %edx
- movl %edx, (%edi)
-# ifdef USE_AS_STPCPY
- lea 3(%edi), %eax
-# endif
- sub $4, %ebx
- lea 4(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit5):
- movl (%esi), %ecx
- movb %dh, 4(%edi)
- movl %ecx, (%edi)
-# ifdef USE_AS_STPCPY
- lea 4(%edi), %eax
-# endif
- sub $5, %ebx
- lea 5(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit6):
- movl (%esi), %ecx
- movw 4(%esi), %dx
- movl %ecx, (%edi)
- movw %dx, 4(%edi)
-# ifdef USE_AS_STPCPY
- lea 5(%edi), %eax
-# endif
- sub $6, %ebx
- lea 6(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit7):
- movl (%esi), %ecx
- movl 3(%esi), %edx
- movl %ecx, (%edi)
- movl %edx, 3(%edi)
-# ifdef USE_AS_STPCPY
- lea 6(%edi), %eax
-# endif
- sub $7, %ebx
- lea 7(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit8):
- movlpd (%esi), %xmm0
- movlpd %xmm0, (%edi)
-# ifdef USE_AS_STPCPY
- lea 7(%edi), %eax
-# endif
- sub $8, %ebx
- lea 8(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit9):
- movlpd (%esi), %xmm0
- movb %dh, 8(%edi)
- movlpd %xmm0, (%edi)
-# ifdef USE_AS_STPCPY
- lea 8(%edi), %eax
-# endif
- sub $9, %ebx
- lea 9(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit10):
- movlpd (%esi), %xmm0
- movw 8(%esi), %dx
- movlpd %xmm0, (%edi)
- movw %dx, 8(%edi)
-# ifdef USE_AS_STPCPY
- lea 9(%edi), %eax
-# endif
- sub $10, %ebx
- lea 10(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit11):
- movlpd (%esi), %xmm0
- movl 7(%esi), %edx
- movlpd %xmm0, (%edi)
- movl %edx, 7(%edi)
-# ifdef USE_AS_STPCPY
- lea 10(%edi), %eax
-# endif
- sub $11, %ebx
- lea 11(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit12):
- movlpd (%esi), %xmm0
- movl 8(%esi), %edx
- movlpd %xmm0, (%edi)
- movl %edx, 8(%edi)
-# ifdef USE_AS_STPCPY
- lea 11(%edi), %eax
-# endif
- sub $12, %ebx
- lea 12(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit13):
- movlpd (%esi), %xmm0
- movlpd 5(%esi), %xmm1
- movlpd %xmm0, (%edi)
- movlpd %xmm1, 5(%edi)
-# ifdef USE_AS_STPCPY
- lea 12(%edi), %eax
-# endif
- sub $13, %ebx
- lea 13(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit14):
- movlpd (%esi), %xmm0
- movlpd 6(%esi), %xmm1
- movlpd %xmm0, (%edi)
- movlpd %xmm1, 6(%edi)
-# ifdef USE_AS_STPCPY
- lea 13(%edi), %eax
-# endif
- sub $14, %ebx
- lea 14(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit15):
- movlpd (%esi), %xmm0
- movlpd 7(%esi), %xmm1
- movlpd %xmm0, (%edi)
- movlpd %xmm1, 7(%edi)
-# ifdef USE_AS_STPCPY
- lea 14(%edi), %eax
-# endif
- sub $15, %ebx
- lea 15(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit16):
- movdqu (%esi), %xmm0
- movdqu %xmm0, (%edi)
-# ifdef USE_AS_STPCPY
- lea 15(%edi), %eax
-# endif
- sub $16, %ebx
- lea 16(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit17):
- movdqu (%esi), %xmm0
- movdqu %xmm0, (%edi)
- movb %dh, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 16(%edi), %eax
-# endif
- sub $17, %ebx
- lea 17(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit18):
- movdqu (%esi), %xmm0
- movw 16(%esi), %cx
- movdqu %xmm0, (%edi)
- movw %cx, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 17(%edi), %eax
-# endif
- sub $18, %ebx
- lea 18(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit19):
- movdqu (%esi), %xmm0
- movl 15(%esi), %ecx
- movdqu %xmm0, (%edi)
- movl %ecx, 15(%edi)
-# ifdef USE_AS_STPCPY
- lea 18(%edi), %eax
-# endif
- sub $19, %ebx
- lea 19(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit20):
- movdqu (%esi), %xmm0
- movl 16(%esi), %ecx
- movdqu %xmm0, (%edi)
- movl %ecx, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 19(%edi), %eax
-# endif
- sub $20, %ebx
- lea 20(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit21):
- movdqu (%esi), %xmm0
- movl 16(%esi), %ecx
- movdqu %xmm0, (%edi)
- movl %ecx, 16(%edi)
- movb %dh, 20(%edi)
-# ifdef USE_AS_STPCPY
- lea 20(%edi), %eax
-# endif
- sub $21, %ebx
- lea 21(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit22):
- movdqu (%esi), %xmm0
- movlpd 14(%esi), %xmm3
- movdqu %xmm0, (%edi)
- movlpd %xmm3, 14(%edi)
-# ifdef USE_AS_STPCPY
- lea 21(%edi), %eax
-# endif
- sub $22, %ebx
- lea 22(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit23):
- movdqu (%esi), %xmm0
- movlpd 15(%esi), %xmm3
- movdqu %xmm0, (%edi)
- movlpd %xmm3, 15(%edi)
-# ifdef USE_AS_STPCPY
- lea 22(%edi), %eax
-# endif
- sub $23, %ebx
- lea 23(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit24):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 23(%edi), %eax
-# endif
- sub $24, %ebx
- lea 24(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit25):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
- movb %dh, 24(%edi)
-# ifdef USE_AS_STPCPY
- lea 24(%edi), %eax
-# endif
- sub $25, %ebx
- lea 25(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit26):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movw 24(%esi), %cx
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
- movw %cx, 24(%edi)
-# ifdef USE_AS_STPCPY
- lea 25(%edi), %eax
-# endif
- sub $26, %ebx
- lea 26(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit27):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movl 23(%esi), %ecx
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
- movl %ecx, 23(%edi)
-# ifdef USE_AS_STPCPY
- lea 26(%edi), %eax
-# endif
- sub $27, %ebx
- lea 27(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit28):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movl 24(%esi), %ecx
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
- movl %ecx, 24(%edi)
-# ifdef USE_AS_STPCPY
- lea 27(%edi), %eax
-# endif
- sub $28, %ebx
- lea 28(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit29):
- movdqu (%esi), %xmm0
- movdqu 13(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movdqu %xmm2, 13(%edi)
-# ifdef USE_AS_STPCPY
- lea 28(%edi), %eax
-# endif
- sub $29, %ebx
- lea 29(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit30):
- movdqu (%esi), %xmm0
- movdqu 14(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movdqu %xmm2, 14(%edi)
-# ifdef USE_AS_STPCPY
- lea 29(%edi), %eax
-# endif
- sub $30, %ebx
- lea 30(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
-
- .p2align 4
-L(Exit31):
- movdqu (%esi), %xmm0
- movdqu 15(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movdqu %xmm2, 15(%edi)
-# ifdef USE_AS_STPCPY
- lea 30(%edi), %eax
-# endif
- sub $31, %ebx
- lea 31(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(Exit32):
- movdqu (%esi), %xmm0
- movdqu 16(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movdqu %xmm2, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 31(%edi), %eax
-# endif
- sub $32, %ebx
- lea 32(%edi), %edi
- jnz L(StrncpyFillTailWithZero)
- RETURN
-
- .p2align 4
-L(StrncpyExit1):
- movb (%esi), %dl
- movb %dl, (%edi)
-# ifdef USE_AS_STPCPY
- lea 1(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit2):
- movw (%esi), %dx
- movw %dx, (%edi)
-# ifdef USE_AS_STPCPY
- lea 2(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit3):
- movw (%esi), %cx
- movb 2(%esi), %dl
- movw %cx, (%edi)
- movb %dl, 2(%edi)
-# ifdef USE_AS_STPCPY
- lea 3(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit4):
- movl (%esi), %edx
- movl %edx, (%edi)
-# ifdef USE_AS_STPCPY
- lea 4(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit5):
- movl (%esi), %ecx
- movb 4(%esi), %dl
- movl %ecx, (%edi)
- movb %dl, 4(%edi)
-# ifdef USE_AS_STPCPY
- lea 5(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit6):
- movl (%esi), %ecx
- movw 4(%esi), %dx
- movl %ecx, (%edi)
- movw %dx, 4(%edi)
-# ifdef USE_AS_STPCPY
- lea 6(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit7):
- movl (%esi), %ecx
- movl 3(%esi), %edx
- movl %ecx, (%edi)
- movl %edx, 3(%edi)
-# ifdef USE_AS_STPCPY
- lea 7(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit8):
- movlpd (%esi), %xmm0
- movlpd %xmm0, (%edi)
-# ifdef USE_AS_STPCPY
- lea 8(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit9):
- movlpd (%esi), %xmm0
- movb 8(%esi), %dl
- movlpd %xmm0, (%edi)
- movb %dl, 8(%edi)
-# ifdef USE_AS_STPCPY
- lea 9(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit10):
- movlpd (%esi), %xmm0
- movw 8(%esi), %dx
- movlpd %xmm0, (%edi)
- movw %dx, 8(%edi)
-# ifdef USE_AS_STPCPY
- lea 10(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit11):
- movlpd (%esi), %xmm0
- movl 7(%esi), %edx
- movlpd %xmm0, (%edi)
- movl %edx, 7(%edi)
-# ifdef USE_AS_STPCPY
- lea 11(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit12):
- movlpd (%esi), %xmm0
- movl 8(%esi), %edx
- movlpd %xmm0, (%edi)
- movl %edx, 8(%edi)
-# ifdef USE_AS_STPCPY
- lea 12(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit13):
- movlpd (%esi), %xmm0
- movlpd 5(%esi), %xmm1
- movlpd %xmm0, (%edi)
- movlpd %xmm1, 5(%edi)
-# ifdef USE_AS_STPCPY
- lea 13(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit14):
- movlpd (%esi), %xmm0
- movlpd 6(%esi), %xmm1
- movlpd %xmm0, (%edi)
- movlpd %xmm1, 6(%edi)
-# ifdef USE_AS_STPCPY
- lea 14(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit15):
- movlpd (%esi), %xmm0
- movlpd 7(%esi), %xmm1
- movlpd %xmm0, (%edi)
- movlpd %xmm1, 7(%edi)
-# ifdef USE_AS_STPCPY
- lea 15(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit16):
- movdqu (%esi), %xmm0
- movdqu %xmm0, (%edi)
-# ifdef USE_AS_STPCPY
- lea 16(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit17):
- movdqu (%esi), %xmm0
- movb 16(%esi), %cl
- movdqu %xmm0, (%edi)
- movb %cl, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 17(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit18):
- movdqu (%esi), %xmm0
- movw 16(%esi), %cx
- movdqu %xmm0, (%edi)
- movw %cx, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 18(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit19):
- movdqu (%esi), %xmm0
- movl 15(%esi), %ecx
- movdqu %xmm0, (%edi)
- movl %ecx, 15(%edi)
-# ifdef USE_AS_STPCPY
- lea 19(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit20):
- movdqu (%esi), %xmm0
- movl 16(%esi), %ecx
- movdqu %xmm0, (%edi)
- movl %ecx, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 20(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit21):
- movdqu (%esi), %xmm0
- movl 16(%esi), %ecx
- movb 20(%esi), %dl
- movdqu %xmm0, (%edi)
- movl %ecx, 16(%edi)
- movb %dl, 20(%edi)
-# ifdef USE_AS_STPCPY
- lea 21(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit22):
- movdqu (%esi), %xmm0
- movlpd 14(%esi), %xmm3
- movdqu %xmm0, (%edi)
- movlpd %xmm3, 14(%edi)
-# ifdef USE_AS_STPCPY
- lea 22(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit23):
- movdqu (%esi), %xmm0
- movlpd 15(%esi), %xmm3
- movdqu %xmm0, (%edi)
- movlpd %xmm3, 15(%edi)
-# ifdef USE_AS_STPCPY
- lea 23(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit24):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 24(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit25):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movb 24(%esi), %cl
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
- movb %cl, 24(%edi)
-# ifdef USE_AS_STPCPY
- lea 25(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit26):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movw 24(%esi), %cx
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
- movw %cx, 24(%edi)
-# ifdef USE_AS_STPCPY
- lea 26(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit27):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movl 23(%esi), %ecx
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
- movl %ecx, 23(%edi)
-# ifdef USE_AS_STPCPY
- lea 27(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit28):
- movdqu (%esi), %xmm0
- movlpd 16(%esi), %xmm2
- movl 24(%esi), %ecx
- movdqu %xmm0, (%edi)
- movlpd %xmm2, 16(%edi)
- movl %ecx, 24(%edi)
-# ifdef USE_AS_STPCPY
- lea 28(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit29):
- movdqu (%esi), %xmm0
- movdqu 13(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movdqu %xmm2, 13(%edi)
-# ifdef USE_AS_STPCPY
- lea 29(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit30):
- movdqu (%esi), %xmm0
- movdqu 14(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movdqu %xmm2, 14(%edi)
-# ifdef USE_AS_STPCPY
- lea 30(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit31):
- movdqu (%esi), %xmm0
- movdqu 15(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movdqu %xmm2, 15(%edi)
-# ifdef USE_AS_STPCPY
- lea 31(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit32):
- movdqu (%esi), %xmm0
- movdqu 16(%esi), %xmm2
- movdqu %xmm0, (%edi)
- movdqu %xmm2, 16(%edi)
-# ifdef USE_AS_STPCPY
- lea 32(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit33):
- movdqu (%esi), %xmm0
- movdqu 16(%esi), %xmm2
- movb 32(%esi), %cl
- movdqu %xmm0, (%edi)
- movdqu %xmm2, 16(%edi)
- movb %cl, 32(%edi)
- RETURN
-
- .p2align 4
-L(Fill0):
- RETURN
-
- .p2align 4
-L(Fill1):
- movb %dl, (%edi)
- RETURN
-
- .p2align 4
-L(Fill2):
- movw %dx, (%edi)
- RETURN
-
- .p2align 4
-L(Fill3):
- movl %edx, -1(%edi)
- RETURN
-
- .p2align 4
-L(Fill4):
- movl %edx, (%edi)
- RETURN
-
- .p2align 4
-L(Fill5):
- movl %edx, (%edi)
- movb %dl, 4(%edi)
- RETURN
-
- .p2align 4
-L(Fill6):
- movl %edx, (%edi)
- movw %dx, 4(%edi)
- RETURN
-
- .p2align 4
-L(Fill7):
- movlpd %xmm0, -1(%edi)
- RETURN
-
- .p2align 4
-L(Fill8):
- movlpd %xmm0, (%edi)
- RETURN
-
- .p2align 4
-L(Fill9):
- movlpd %xmm0, (%edi)
- movb %dl, 8(%edi)
- RETURN
-
- .p2align 4
-L(Fill10):
- movlpd %xmm0, (%edi)
- movw %dx, 8(%edi)
- RETURN
-
- .p2align 4
-L(Fill11):
- movlpd %xmm0, (%edi)
- movl %edx, 7(%edi)
- RETURN
-
- .p2align 4
-L(Fill12):
- movlpd %xmm0, (%edi)
- movl %edx, 8(%edi)
- RETURN
-
- .p2align 4
-L(Fill13):
- movlpd %xmm0, (%edi)
- movlpd %xmm0, 5(%edi)
- RETURN
-
- .p2align 4
-L(Fill14):
- movlpd %xmm0, (%edi)
- movlpd %xmm0, 6(%edi)
- RETURN
-
- .p2align 4
-L(Fill15):
- movdqu %xmm0, -1(%edi)
- RETURN
-
- .p2align 4
-L(Fill16):
- movdqu %xmm0, (%edi)
- RETURN
-
- .p2align 4
-L(CopyFrom1To16BytesUnalignedXmm2):
- movdqu %xmm2, (%edi, %ecx)
-
- .p2align 4
-L(CopyFrom1To16BytesXmmExit):
- bsf %edx, %edx
- add $15, %ebx
- add %ecx, %edi
-# ifdef USE_AS_STPCPY
- lea (%edi, %edx), %eax
-# endif
- sub %edx, %ebx
- lea 1(%edi, %edx), %edi
-
- .p2align 4
-L(StrncpyFillTailWithZero):
- pxor %xmm0, %xmm0
- xor %edx, %edx
- sub $16, %ebx
- jbe L(StrncpyFillExit)
-
- movdqu %xmm0, (%edi)
- add $16, %edi
-
- mov %edi, %esi
- and $0xf, %esi
- sub %esi, %edi
- add %esi, %ebx
- sub $64, %ebx
- jb L(StrncpyFillLess64)
-
-L(StrncpyFillLoopMovdqa):
- movdqa %xmm0, (%edi)
- movdqa %xmm0, 16(%edi)
- movdqa %xmm0, 32(%edi)
- movdqa %xmm0, 48(%edi)
- add $64, %edi
- sub $64, %ebx
- jae L(StrncpyFillLoopMovdqa)
-
-L(StrncpyFillLess64):
- add $32, %ebx
- jl L(StrncpyFillLess32)
- movdqa %xmm0, (%edi)
- movdqa %xmm0, 16(%edi)
- add $32, %edi
- sub $16, %ebx
- jl L(StrncpyFillExit)
- movdqa %xmm0, (%edi)
- add $16, %edi
- BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
-
-L(StrncpyFillLess32):
- add $16, %ebx
- jl L(StrncpyFillExit)
- movdqa %xmm0, (%edi)
- add $16, %edi
- BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
-
-L(StrncpyFillExit):
- add $16, %ebx
- BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
-
- .p2align 4
-L(UnalignedLeaveCase2OrCase3):
- test %edx, %edx
- jnz L(Unaligned64LeaveCase2)
-L(Unaligned64LeaveCase3):
- lea 64(%ebx), %ecx
- and $-16, %ecx
- add $48, %ebx
- jl L(CopyFrom1To16BytesCase3)
- movdqu %xmm4, (%edi)
- sub $16, %ebx
- jb L(CopyFrom1To16BytesCase3)
- movdqu %xmm5, 16(%edi)
- sub $16, %ebx
- jb L(CopyFrom1To16BytesCase3)
- movdqu %xmm6, 32(%edi)
- sub $16, %ebx
- jb L(CopyFrom1To16BytesCase3)
- movdqu %xmm7, 48(%edi)
-# ifdef USE_AS_STPCPY
- lea 64(%edi), %eax
-# endif
- RETURN
-
- .p2align 4
-L(Unaligned64LeaveCase2):
- xor %ecx, %ecx
- pcmpeqb %xmm4, %xmm0
- pmovmskb %xmm0, %edx
- add $48, %ebx
- jle L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnalignedXmm4)
-
- pcmpeqb %xmm5, %xmm0
- pmovmskb %xmm0, %edx
- movdqu %xmm4, (%edi)
- add $16, %ecx
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnalignedXmm5)
-
- pcmpeqb %xmm6, %xmm0
- pmovmskb %xmm0, %edx
- movdqu %xmm5, 16(%edi)
- add $16, %ecx
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %edx, %edx
- jnz L(CopyFrom1To16BytesUnalignedXmm6)
-
- pcmpeqb %xmm7, %xmm0
- pmovmskb %xmm0, %edx
- movdqu %xmm6, 32(%edi)
- lea 16(%edi, %ecx), %edi
- lea 16(%esi, %ecx), %esi
- bsf %edx, %edx
- cmp %ebx, %edx
- jb L(CopyFrom1To16BytesExit)
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
-
- .p2align 4
-L(ExitZero):
- movl %edi, %eax
- RETURN
-
-END (STRCPY)
-
- .p2align 4
- .section .rodata
-L(ExitTable):
- .int JMPTBL(L(Exit1), L(ExitTable))
- .int JMPTBL(L(Exit2), L(ExitTable))
- .int JMPTBL(L(Exit3), L(ExitTable))
- .int JMPTBL(L(Exit4), L(ExitTable))
- .int JMPTBL(L(Exit5), L(ExitTable))
- .int JMPTBL(L(Exit6), L(ExitTable))
- .int JMPTBL(L(Exit7), L(ExitTable))
- .int JMPTBL(L(Exit8), L(ExitTable))
- .int JMPTBL(L(Exit9), L(ExitTable))
- .int JMPTBL(L(Exit10), L(ExitTable))
- .int JMPTBL(L(Exit11), L(ExitTable))
- .int JMPTBL(L(Exit12), L(ExitTable))
- .int JMPTBL(L(Exit13), L(ExitTable))
- .int JMPTBL(L(Exit14), L(ExitTable))
- .int JMPTBL(L(Exit15), L(ExitTable))
- .int JMPTBL(L(Exit16), L(ExitTable))
- .int JMPTBL(L(Exit17), L(ExitTable))
- .int JMPTBL(L(Exit18), L(ExitTable))
- .int JMPTBL(L(Exit19), L(ExitTable))
- .int JMPTBL(L(Exit20), L(ExitTable))
- .int JMPTBL(L(Exit21), L(ExitTable))
- .int JMPTBL(L(Exit22), L(ExitTable))
- .int JMPTBL(L(Exit23), L(ExitTable))
- .int JMPTBL(L(Exit24), L(ExitTable))
- .int JMPTBL(L(Exit25), L(ExitTable))
- .int JMPTBL(L(Exit26), L(ExitTable))
- .int JMPTBL(L(Exit27), L(ExitTable))
- .int JMPTBL(L(Exit28), L(ExitTable))
- .int JMPTBL(L(Exit29), L(ExitTable))
- .int JMPTBL(L(Exit30), L(ExitTable))
- .int JMPTBL(L(Exit31), L(ExitTable))
- .int JMPTBL(L(Exit32), L(ExitTable))
-
-L(ExitStrncpyTable):
- .int JMPTBL(L(Exit0), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
- .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
-
- .p2align 4
-L(FillTable):
- .int JMPTBL(L(Fill0), L(FillTable))
- .int JMPTBL(L(Fill1), L(FillTable))
- .int JMPTBL(L(Fill2), L(FillTable))
- .int JMPTBL(L(Fill3), L(FillTable))
- .int JMPTBL(L(Fill4), L(FillTable))
- .int JMPTBL(L(Fill5), L(FillTable))
- .int JMPTBL(L(Fill6), L(FillTable))
- .int JMPTBL(L(Fill7), L(FillTable))
- .int JMPTBL(L(Fill8), L(FillTable))
- .int JMPTBL(L(Fill9), L(FillTable))
- .int JMPTBL(L(Fill10), L(FillTable))
- .int JMPTBL(L(Fill11), L(FillTable))
- .int JMPTBL(L(Fill12), L(FillTable))
- .int JMPTBL(L(Fill13), L(FillTable))
- .int JMPTBL(L(Fill14), L(FillTable))
- .int JMPTBL(L(Fill15), L(FillTable))
- .int JMPTBL(L(Fill16), L(FillTable))
-# else
-# define PARMS 4
-# define ENTRANCE
-# define RETURN POP (%edi); ret; CFI_PUSH (%edi)
-# define RETURN1 ret
-
- .text
-ENTRY (STRCPY)
- ENTRANCE
- mov STR1(%esp), %edx
- mov STR2(%esp), %ecx
-
- cmpb $0, (%ecx)
- jz L(ExitTail1)
- cmpb $0, 1(%ecx)
- jz L(ExitTail2)
- cmpb $0, 2(%ecx)
- jz L(ExitTail3)
- cmpb $0, 3(%ecx)
- jz L(ExitTail4)
- cmpb $0, 4(%ecx)
- jz L(ExitTail5)
- cmpb $0, 5(%ecx)
- jz L(ExitTail6)
- cmpb $0, 6(%ecx)
- jz L(ExitTail7)
- cmpb $0, 7(%ecx)
- jz L(ExitTail8)
- cmpb $0, 8(%ecx)
- jz L(ExitTail9)
- cmpb $0, 9(%ecx)
- jz L(ExitTail10)
- cmpb $0, 10(%ecx)
- jz L(ExitTail11)
- cmpb $0, 11(%ecx)
- jz L(ExitTail12)
- cmpb $0, 12(%ecx)
- jz L(ExitTail13)
- cmpb $0, 13(%ecx)
- jz L(ExitTail14)
- cmpb $0, 14(%ecx)
- jz L(ExitTail15)
- cmpb $0, 15(%ecx)
- jz L(ExitTail16)
-
- PUSH (%edi)
- PUSH (%ebx)
-
- mov %edx, %edi
- lea 16(%ecx), %ebx
- and $-16, %ebx
- pxor %xmm0, %xmm0
- movdqu (%ecx), %xmm1
- movdqu %xmm1, (%edx)
- pcmpeqb (%ebx), %xmm0
- pmovmskb %xmm0, %eax
- sub %ecx, %ebx
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- mov %ecx, %eax
- lea 16(%ecx), %ecx
- and $-16, %ecx
- sub %ecx, %eax
- sub %eax, %edx
- xor %ebx, %ebx
-
- .p2align 4
- movdqa (%ecx), %xmm1
- movaps 16(%ecx), %xmm2
- movdqu %xmm1, (%edx)
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- add $16, %ebx
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %ebx), %xmm3
- movdqu %xmm2, (%edx, %ebx)
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %eax
- add $16, %ebx
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %ebx), %xmm4
- movdqu %xmm3, (%edx, %ebx)
- pcmpeqb %xmm4, %xmm0
- pmovmskb %xmm0, %eax
- add $16, %ebx
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %ebx), %xmm1
- movdqu %xmm4, (%edx, %ebx)
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- add $16, %ebx
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %ebx), %xmm2
- movdqu %xmm1, (%edx, %ebx)
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- add $16, %ebx
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %ebx), %xmm3
- movdqu %xmm2, (%edx, %ebx)
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %eax
- add $16, %ebx
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movdqu %xmm3, (%edx, %ebx)
- mov %ecx, %eax
- lea 16(%ecx, %ebx), %ecx
- and $-0x40, %ecx
- sub %ecx, %eax
- sub %eax, %edx
-
-L(Aligned64Loop):
- movaps (%ecx), %xmm2
- movaps %xmm2, %xmm4
- movaps 16(%ecx), %xmm5
- movaps 32(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 48(%ecx), %xmm7
- pminub %xmm5, %xmm2
- add $64, %ecx
- pminub %xmm7, %xmm3
- add $64, %edx
- pminub %xmm2, %xmm3
- pcmpeqb %xmm0, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(Aligned64Leave)
-L(Aligned64Loop_start):
- movdqu %xmm4, -64(%edx)
- movaps (%ecx), %xmm2
- movdqa %xmm2, %xmm4
- movdqu %xmm5, -48(%edx)
- movaps 16(%ecx), %xmm5
- pminub %xmm5, %xmm2
- movaps 32(%ecx), %xmm3
- movdqu %xmm6, -32(%edx)
- movaps %xmm3, %xmm6
- movdqu %xmm7, -16(%edx)
- movaps 48(%ecx), %xmm7
- pminub %xmm7, %xmm3
- pminub %xmm2, %xmm3
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %eax
- add $64, %edx
- add $64, %ecx
- test %eax, %eax
- jz L(Aligned64Loop_start)
-L(Aligned64Leave):
- sub $0xa0, %ebx
- pxor %xmm0, %xmm0
- pcmpeqb %xmm4, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm5, %xmm0
- pmovmskb %xmm0, %eax
- movdqu %xmm4, -64(%edx)
- test %eax, %eax
- lea 16(%ebx), %ebx
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm6, %xmm0
- pmovmskb %xmm0, %eax
- movdqu %xmm5, -48(%edx)
- test %eax, %eax
- lea 16(%ebx), %ebx
- jnz L(CopyFrom1To16Bytes)
-
- movdqu %xmm6, -32(%edx)
- pcmpeqb %xmm7, %xmm0
- pmovmskb %xmm0, %eax
- lea 16(%ebx), %ebx
-
-/*-----------------End of main part---------------------------*/
-
- .p2align 4
-L(CopyFrom1To16Bytes):
- add %ebx, %edx
- add %ebx, %ecx
-
- POP (%ebx)
- test %al, %al
- jz L(ExitHigh)
- test $0x01, %al
- jnz L(Exit1)
- test $0x02, %al
- jnz L(Exit2)
- test $0x04, %al
- jnz L(Exit3)
- test $0x08, %al
- jnz L(Exit4)
- test $0x10, %al
- jnz L(Exit5)
- test $0x20, %al
- jnz L(Exit6)
- test $0x40, %al
- jnz L(Exit7)
- /* Exit 8 */
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 7(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(ExitHigh):
- test $0x01, %ah
- jnz L(Exit9)
- test $0x02, %ah
- jnz L(Exit10)
- test $0x04, %ah
- jnz L(Exit11)
- test $0x08, %ah
- jnz L(Exit12)
- test $0x10, %ah
- jnz L(Exit13)
- test $0x20, %ah
- jnz L(Exit14)
- test $0x40, %ah
- jnz L(Exit15)
- /* Exit 16 */
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 8(%ecx), %xmm0
- movlpd %xmm0, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 15(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit1):
- movb (%ecx), %al
- movb %al, (%edx)
-# ifdef USE_AS_STPCPY
- lea (%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit2):
- movw (%ecx), %ax
- movw %ax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 1(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit3):
- movw (%ecx), %ax
- movw %ax, (%edx)
- movb 2(%ecx), %al
- movb %al, 2(%edx)
-# ifdef USE_AS_STPCPY
- lea 2(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit4):
- movl (%ecx), %eax
- movl %eax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 3(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit5):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movb 4(%ecx), %al
- movb %al, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 4(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit6):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movw 4(%ecx), %ax
- movw %ax, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 5(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit7):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 3(%ecx), %eax
- movl %eax, 3(%edx)
-# ifdef USE_AS_STPCPY
- lea 6(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit9):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
- movb 8(%ecx), %al
- movb %al, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 8(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit10):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
- movw 8(%ecx), %ax
- movw %ax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 9(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit11):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
- movl 7(%ecx), %eax
- movl %eax, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 10(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit12):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
- movl 8(%ecx), %eax
- movl %eax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 11(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit13):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 5(%ecx), %xmm0
- movlpd %xmm0, 5(%edx)
-# ifdef USE_AS_STPCPY
- lea 12(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit14):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 6(%ecx), %xmm0
- movlpd %xmm0, 6(%edx)
-# ifdef USE_AS_STPCPY
- lea 13(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
- .p2align 4
-L(Exit15):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 7(%ecx), %xmm0
- movlpd %xmm0, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 14(%edx), %eax
-# else
- movl %edi, %eax
-# endif
- RETURN
-
-CFI_POP (%edi)
-
- .p2align 4
-L(ExitTail1):
- movb (%ecx), %al
- movb %al, (%edx)
- movl %edx, %eax
- RETURN1
-
- .p2align 4
-L(ExitTail2):
- movw (%ecx), %ax
- movw %ax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 1(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail3):
- movw (%ecx), %ax
- movw %ax, (%edx)
- movb 2(%ecx), %al
- movb %al, 2(%edx)
-# ifdef USE_AS_STPCPY
- lea 2(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail4):
- movl (%ecx), %eax
- movl %eax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 3(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail5):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movb 4(%ecx), %al
- movb %al, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 4(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail6):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movw 4(%ecx), %ax
- movw %ax, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 5(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail7):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 3(%ecx), %eax
- movl %eax, 3(%edx)
-# ifdef USE_AS_STPCPY
- lea 6(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail8):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 7(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail9):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
- movb 8(%ecx), %al
- movb %al, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 8(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail10):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
- movw 8(%ecx), %ax
- movw %ax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 9(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail11):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
- movl 7(%ecx), %eax
- movl %eax, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 10(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail12):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 4(%ecx), %eax
- movl %eax, 4(%edx)
- movl 8(%ecx), %eax
- movl %eax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 11(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail13):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 5(%ecx), %xmm0
- movlpd %xmm0, 5(%edx)
-# ifdef USE_AS_STPCPY
- lea 12(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail14):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 6(%ecx), %xmm0
- movlpd %xmm0, 6(%edx)
-# ifdef USE_AS_STPCPY
- lea 13(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail15):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 7(%ecx), %xmm0
- movlpd %xmm0, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 14(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
- .p2align 4
-L(ExitTail16):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 8(%ecx), %xmm0
- movlpd %xmm0, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 15(%edx), %eax
-# else
- movl %edx, %eax
-# endif
- RETURN1
-
-END (STRCPY)
-# endif
-
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcpy-ssse3.S b/sysdeps/i386/i686/multiarch/strcpy-ssse3.S
deleted file mode 100644
index effd85da94..0000000000
--- a/sysdeps/i386/i686/multiarch/strcpy-ssse3.S
+++ /dev/null
@@ -1,3901 +0,0 @@
-/* strcpy with SSSE3
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-
-#if IS_IN (libc)
-
-# ifndef USE_AS_STRCAT
-# include <sysdep.h>
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# ifndef STRCPY
-# define STRCPY __strcpy_ssse3
-# endif
-
-# ifdef USE_AS_STRNCPY
-# define PARMS 8
-# define ENTRANCE PUSH (%ebx)
-# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx);
-# define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
-# else
-# define PARMS 4
-# define ENTRANCE
-# define RETURN ret
-# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi)
-# endif
-
-# ifdef USE_AS_STPCPY
-# define SAVE_RESULT(n) lea n(%edx), %eax
-# define SAVE_RESULT_TAIL(n) lea n(%edx), %eax
-# else
-# define SAVE_RESULT(n) movl %edi, %eax
-# define SAVE_RESULT_TAIL(n) movl %edx, %eax
-# endif
-
-# define STR1 PARMS
-# define STR2 STR1+4
-# define LEN STR2+4
-
-/* In this code following instructions are used for copying:
- movb - 1 byte
- movw - 2 byte
- movl - 4 byte
- movlpd - 8 byte
- movaps - 16 byte - requires 16 byte alignment
- of sourse and destination adresses.
-*/
-
-.text
-ENTRY (STRCPY)
- ENTRANCE
- mov STR1(%esp), %edx
- mov STR2(%esp), %ecx
-# ifdef USE_AS_STRNCPY
- movl LEN(%esp), %ebx
- cmp $8, %ebx
- jbe L(StrncpyExit8Bytes)
-# endif
- cmpb $0, (%ecx)
- jz L(ExitTail1)
- cmpb $0, 1(%ecx)
- jz L(ExitTail2)
- cmpb $0, 2(%ecx)
- jz L(ExitTail3)
- cmpb $0, 3(%ecx)
- jz L(ExitTail4)
- cmpb $0, 4(%ecx)
- jz L(ExitTail5)
- cmpb $0, 5(%ecx)
- jz L(ExitTail6)
- cmpb $0, 6(%ecx)
- jz L(ExitTail7)
- cmpb $0, 7(%ecx)
- jz L(ExitTail8)
-# ifdef USE_AS_STRNCPY
- cmp $16, %ebx
- jb L(StrncpyExit15Bytes)
-# endif
- cmpb $0, 8(%ecx)
- jz L(ExitTail9)
- cmpb $0, 9(%ecx)
- jz L(ExitTail10)
- cmpb $0, 10(%ecx)
- jz L(ExitTail11)
- cmpb $0, 11(%ecx)
- jz L(ExitTail12)
- cmpb $0, 12(%ecx)
- jz L(ExitTail13)
- cmpb $0, 13(%ecx)
- jz L(ExitTail14)
- cmpb $0, 14(%ecx)
- jz L(ExitTail15)
-# ifdef USE_AS_STRNCPY
- cmp $16, %ebx
- je L(ExitTail16)
-# endif
- cmpb $0, 15(%ecx)
- jz L(ExitTail16)
-
- PUSH (%edi)
- mov %edx, %edi
-# endif
- PUSH (%esi)
-# ifdef USE_AS_STRNCPY
- mov %ecx, %esi
- sub $16, %ebx
- and $0xf, %esi
-
-/* add 16 bytes ecx_offset to ebx */
-
- add %esi, %ebx
-# endif
- lea 16(%ecx), %esi
- and $-16, %esi
- pxor %xmm0, %xmm0
- movlpd (%ecx), %xmm1
- movlpd %xmm1, (%edx)
-
- pcmpeqb (%esi), %xmm0
- movlpd 8(%ecx), %xmm1
- movlpd %xmm1, 8(%edx)
-
- pmovmskb %xmm0, %eax
- sub %ecx, %esi
-
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- mov %edx, %eax
- lea 16(%edx), %edx
- and $-16, %edx
- sub %edx, %eax
-
-# ifdef USE_AS_STRNCPY
- add %eax, %esi
- lea -1(%esi), %esi
- and $1<<31, %esi
- test %esi, %esi
- jnz L(ContinueCopy)
- lea 16(%ebx), %ebx
-
-L(ContinueCopy):
-# endif
- sub %eax, %ecx
- mov %ecx, %eax
- and $0xf, %eax
- mov $0, %esi
-
-/* case: ecx_offset == edx_offset */
-
- jz L(Align16Both)
-
- cmp $8, %eax
- jae L(ShlHigh8)
- cmp $1, %eax
- je L(Shl1)
- cmp $2, %eax
- je L(Shl2)
- cmp $3, %eax
- je L(Shl3)
- cmp $4, %eax
- je L(Shl4)
- cmp $5, %eax
- je L(Shl5)
- cmp $6, %eax
- je L(Shl6)
- jmp L(Shl7)
-
-L(ShlHigh8):
- je L(Shl8)
- cmp $9, %eax
- je L(Shl9)
- cmp $10, %eax
- je L(Shl10)
- cmp $11, %eax
- je L(Shl11)
- cmp $12, %eax
- je L(Shl12)
- cmp $13, %eax
- je L(Shl13)
- cmp $14, %eax
- je L(Shl14)
- jmp L(Shl15)
-
-L(Align16Both):
- movaps (%ecx), %xmm1
- movaps 16(%ecx), %xmm2
- movaps %xmm1, (%edx)
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- lea 16(%esi), %esi
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %esi), %xmm3
- movaps %xmm2, (%edx, %esi)
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %eax
- lea 16(%esi), %esi
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %esi), %xmm4
- movaps %xmm3, (%edx, %esi)
- pcmpeqb %xmm4, %xmm0
- pmovmskb %xmm0, %eax
- lea 16(%esi), %esi
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %esi), %xmm1
- movaps %xmm4, (%edx, %esi)
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- lea 16(%esi), %esi
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %esi), %xmm2
- movaps %xmm1, (%edx, %esi)
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- lea 16(%esi), %esi
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %esi), %xmm3
- movaps %xmm2, (%edx, %esi)
- pcmpeqb %xmm3, %xmm0
- pmovmskb %xmm0, %eax
- lea 16(%esi), %esi
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
-# endif
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps %xmm3, (%edx, %esi)
- mov %ecx, %eax
- lea 16(%ecx, %esi), %ecx
- and $-0x40, %ecx
- sub %ecx, %eax
- sub %eax, %edx
-# ifdef USE_AS_STRNCPY
- lea 112(%ebx, %eax), %ebx
-# endif
- mov $-0x40, %esi
-
-L(Aligned64Loop):
- movaps (%ecx), %xmm2
- movaps 32(%ecx), %xmm3
- movaps %xmm2, %xmm4
- movaps 16(%ecx), %xmm5
- movaps %xmm3, %xmm6
- movaps 48(%ecx), %xmm7
- pminub %xmm5, %xmm2
- pminub %xmm7, %xmm3
- pminub %xmm2, %xmm3
- lea 64(%edx), %edx
- pcmpeqb %xmm0, %xmm3
- lea 64(%ecx), %ecx
- pmovmskb %xmm3, %eax
-# ifdef USE_AS_STRNCPY
- sub $64, %ebx
- jbe L(StrncpyLeaveCase2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Aligned64Leave)
- movaps %xmm4, -64(%edx)
- movaps %xmm5, -48(%edx)
- movaps %xmm6, -32(%edx)
- movaps %xmm7, -16(%edx)
- jmp L(Aligned64Loop)
-
-L(Aligned64Leave):
-# ifdef USE_AS_STRNCPY
- lea 48(%ebx), %ebx
-# endif
- pcmpeqb %xmm4, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm5, %xmm0
-# ifdef USE_AS_STRNCPY
- lea -16(%ebx), %ebx
-# endif
- pmovmskb %xmm0, %eax
- movaps %xmm4, -64(%edx)
- test %eax, %eax
- lea 16(%esi), %esi
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm6, %xmm0
-# ifdef USE_AS_STRNCPY
- lea -16(%ebx), %ebx
-# endif
- pmovmskb %xmm0, %eax
- movaps %xmm5, -48(%edx)
- test %eax, %eax
- lea 16(%esi), %esi
- jnz L(CopyFrom1To16Bytes)
-
- movaps %xmm6, -32(%edx)
- pcmpeqb %xmm7, %xmm0
-# ifdef USE_AS_STRNCPY
- lea -16(%ebx), %ebx
-# endif
- pmovmskb %xmm0, %eax
- lea 16(%esi), %esi
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl1):
- movaps -1(%ecx), %xmm1
- movaps 15(%ecx), %xmm2
-L(Shl1Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit1Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl1LoopExit)
-
- palignr $1, %xmm1, %xmm2
- movaps %xmm3, %xmm1
- movaps %xmm2, (%edx)
- movaps 31(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit1Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl1LoopExit)
-
- palignr $1, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 31(%ecx), %xmm2
- movaps %xmm3, %xmm1
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit1Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl1LoopExit)
-
- palignr $1, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 31(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit1Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl1LoopExit)
-
- palignr $1, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- lea 31(%ecx), %ecx
- lea 16(%edx), %edx
-
- mov %ecx, %eax
- and $-0x40, %ecx
- sub %ecx, %eax
- lea -15(%ecx), %ecx
- sub %eax, %edx
-# ifdef USE_AS_STRNCPY
- add %eax, %ebx
-# endif
- movaps -1(%ecx), %xmm1
-
-L(Shl1LoopStart):
- movaps 15(%ecx), %xmm2
- movaps 31(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 47(%ecx), %xmm4
- movaps %xmm4, %xmm7
- movaps 63(%ecx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %eax
- movaps %xmm5, %xmm7
- palignr $1, %xmm4, %xmm5
- test %eax, %eax
- palignr $1, %xmm3, %xmm4
- jnz L(Shl1Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %ebx
- jbe L(StrncpyLeave1)
-# endif
- palignr $1, %xmm2, %xmm3
- lea 64(%ecx), %ecx
- palignr $1, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%edx)
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- jmp L(Shl1LoopStart)
-
-L(Shl1LoopExit):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 7(%ecx), %xmm0
- movlpd %xmm0, 7(%edx)
- mov $15, %esi
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl2):
- movaps -2(%ecx), %xmm1
- movaps 14(%ecx), %xmm2
-L(Shl2Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit2Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl2LoopExit)
-
- palignr $2, %xmm1, %xmm2
- movaps %xmm3, %xmm1
- movaps %xmm2, (%edx)
- movaps 30(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit2Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl2LoopExit)
-
- palignr $2, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 30(%ecx), %xmm2
- movaps %xmm3, %xmm1
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit2Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl2LoopExit)
-
- palignr $2, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 30(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit2Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl2LoopExit)
-
- palignr $2, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- lea 30(%ecx), %ecx
- lea 16(%edx), %edx
-
- mov %ecx, %eax
- and $-0x40, %ecx
- sub %ecx, %eax
- lea -14(%ecx), %ecx
- sub %eax, %edx
-# ifdef USE_AS_STRNCPY
- add %eax, %ebx
-# endif
- movaps -2(%ecx), %xmm1
-
-L(Shl2LoopStart):
- movaps 14(%ecx), %xmm2
- movaps 30(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 46(%ecx), %xmm4
- movaps %xmm4, %xmm7
- movaps 62(%ecx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %eax
- movaps %xmm5, %xmm7
- palignr $2, %xmm4, %xmm5
- test %eax, %eax
- palignr $2, %xmm3, %xmm4
- jnz L(Shl2Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %ebx
- jbe L(StrncpyLeave2)
-# endif
- palignr $2, %xmm2, %xmm3
- lea 64(%ecx), %ecx
- palignr $2, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%edx)
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- jmp L(Shl2LoopStart)
-
-L(Shl2LoopExit):
- movlpd (%ecx), %xmm0
- movlpd 6(%ecx), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 6(%edx)
- mov $14, %esi
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl3):
- movaps -3(%ecx), %xmm1
- movaps 13(%ecx), %xmm2
-L(Shl3Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit3Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl3LoopExit)
-
- palignr $3, %xmm1, %xmm2
- movaps %xmm3, %xmm1
- movaps %xmm2, (%edx)
- movaps 29(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit3Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl3LoopExit)
-
- palignr $3, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 29(%ecx), %xmm2
- movaps %xmm3, %xmm1
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit3Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl3LoopExit)
-
- palignr $3, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 29(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit3Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl3LoopExit)
-
- palignr $3, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- lea 29(%ecx), %ecx
- lea 16(%edx), %edx
-
- mov %ecx, %eax
- and $-0x40, %ecx
- sub %ecx, %eax
- lea -13(%ecx), %ecx
- sub %eax, %edx
-# ifdef USE_AS_STRNCPY
- add %eax, %ebx
-# endif
- movaps -3(%ecx), %xmm1
-
-L(Shl3LoopStart):
- movaps 13(%ecx), %xmm2
- movaps 29(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 45(%ecx), %xmm4
- movaps %xmm4, %xmm7
- movaps 61(%ecx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %eax
- movaps %xmm5, %xmm7
- palignr $3, %xmm4, %xmm5
- test %eax, %eax
- palignr $3, %xmm3, %xmm4
- jnz L(Shl3Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %ebx
- jbe L(StrncpyLeave3)
-# endif
- palignr $3, %xmm2, %xmm3
- lea 64(%ecx), %ecx
- palignr $3, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%edx)
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- jmp L(Shl3LoopStart)
-
-L(Shl3LoopExit):
- movlpd (%ecx), %xmm0
- movlpd 5(%ecx), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 5(%edx)
- mov $13, %esi
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl4):
- movaps -4(%ecx), %xmm1
- movaps 12(%ecx), %xmm2
-L(Shl4Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit4Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl4LoopExit)
-
- palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
- movaps %xmm2, (%edx)
- movaps 28(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit4Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl4LoopExit)
-
- palignr $4, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 28(%ecx), %xmm2
- movaps %xmm3, %xmm1
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit4Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl4LoopExit)
-
- palignr $4, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 28(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit4Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl4LoopExit)
-
- palignr $4, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- lea 28(%ecx), %ecx
- lea 16(%edx), %edx
-
- mov %ecx, %eax
- and $-0x40, %ecx
- sub %ecx, %eax
- lea -12(%ecx), %ecx
- sub %eax, %edx
-# ifdef USE_AS_STRNCPY
- add %eax, %ebx
-# endif
- movaps -4(%ecx), %xmm1
-
-L(Shl4LoopStart):
- movaps 12(%ecx), %xmm2
- movaps 28(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 44(%ecx), %xmm4
- movaps %xmm4, %xmm7
- movaps 60(%ecx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %eax
- movaps %xmm5, %xmm7
- palignr $4, %xmm4, %xmm5
- test %eax, %eax
- palignr $4, %xmm3, %xmm4
- jnz L(Shl4Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %ebx
- jbe L(StrncpyLeave4)
-# endif
- palignr $4, %xmm2, %xmm3
- lea 64(%ecx), %ecx
- palignr $4, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%edx)
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- jmp L(Shl4LoopStart)
-
-L(Shl4LoopExit):
- movlpd (%ecx), %xmm0
- movl 8(%ecx), %esi
- movlpd %xmm0, (%edx)
- movl %esi, 8(%edx)
- mov $12, %esi
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl5):
- movaps -5(%ecx), %xmm1
- movaps 11(%ecx), %xmm2
-L(Shl5Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit5Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl5LoopExit)
-
- palignr $5, %xmm1, %xmm2
- movaps %xmm3, %xmm1
- movaps %xmm2, (%edx)
- movaps 27(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit5Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl5LoopExit)
-
- palignr $5, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 27(%ecx), %xmm2
- movaps %xmm3, %xmm1
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit5Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl5LoopExit)
-
- palignr $5, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 27(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit5Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl5LoopExit)
-
- palignr $5, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- lea 27(%ecx), %ecx
- lea 16(%edx), %edx
-
- mov %ecx, %eax
- and $-0x40, %ecx
- sub %ecx, %eax
- lea -11(%ecx), %ecx
- sub %eax, %edx
-# ifdef USE_AS_STRNCPY
- add %eax, %ebx
-# endif
- movaps -5(%ecx), %xmm1
-
-L(Shl5LoopStart):
- movaps 11(%ecx), %xmm2
- movaps 27(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 43(%ecx), %xmm4
- movaps %xmm4, %xmm7
- movaps 59(%ecx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %eax
- movaps %xmm5, %xmm7
- palignr $5, %xmm4, %xmm5
- test %eax, %eax
- palignr $5, %xmm3, %xmm4
- jnz L(Shl5Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %ebx
- jbe L(StrncpyLeave5)
-# endif
- palignr $5, %xmm2, %xmm3
- lea 64(%ecx), %ecx
- palignr $5, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%edx)
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- jmp L(Shl5LoopStart)
-
-L(Shl5LoopExit):
- movlpd (%ecx), %xmm0
- movl 7(%ecx), %esi
- movlpd %xmm0, (%edx)
- movl %esi, 7(%edx)
- mov $11, %esi
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl6):
- movaps -6(%ecx), %xmm1
- movaps 10(%ecx), %xmm2
-L(Shl6Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit6Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl6LoopExit)
-
- palignr $6, %xmm1, %xmm2
- movaps %xmm3, %xmm1
- movaps %xmm2, (%edx)
- movaps 26(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit6Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl6LoopExit)
-
- palignr $6, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 26(%ecx), %xmm2
- movaps %xmm3, %xmm1
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit6Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl6LoopExit)
-
- palignr $6, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 26(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit6Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl6LoopExit)
-
- palignr $6, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- lea 26(%ecx), %ecx
- lea 16(%edx), %edx
-
- mov %ecx, %eax
- and $-0x40, %ecx
- sub %ecx, %eax
- lea -10(%ecx), %ecx
- sub %eax, %edx
-# ifdef USE_AS_STRNCPY
- add %eax, %ebx
-# endif
- movaps -6(%ecx), %xmm1
-
-L(Shl6LoopStart):
- movaps 10(%ecx), %xmm2
- movaps 26(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 42(%ecx), %xmm4
- movaps %xmm4, %xmm7
- movaps 58(%ecx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %eax
- movaps %xmm5, %xmm7
- palignr $6, %xmm4, %xmm5
- test %eax, %eax
- palignr $6, %xmm3, %xmm4
- jnz L(Shl6Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %ebx
- jbe L(StrncpyLeave6)
-# endif
- palignr $6, %xmm2, %xmm3
- lea 64(%ecx), %ecx
- palignr $6, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%edx)
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- jmp L(Shl6LoopStart)
-
-L(Shl6LoopExit):
- movlpd (%ecx), %xmm0
- movl 6(%ecx), %esi
- movlpd %xmm0, (%edx)
- movl %esi, 6(%edx)
- mov $10, %esi
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl7):
- movaps -7(%ecx), %xmm1
- movaps 9(%ecx), %xmm2
-L(Shl7Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit7Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl7LoopExit)
-
- palignr $7, %xmm1, %xmm2
- movaps %xmm3, %xmm1
- movaps %xmm2, (%edx)
- movaps 25(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit7Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl7LoopExit)
-
- palignr $7, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 25(%ecx), %xmm2
- movaps %xmm3, %xmm1
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit7Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl7LoopExit)
-
- palignr $7, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 25(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit7Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl7LoopExit)
-
- palignr $7, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- lea 25(%ecx), %ecx
- lea 16(%edx), %edx
-
- mov %ecx, %eax
- and $-0x40, %ecx
- sub %ecx, %eax
- lea -9(%ecx), %ecx
- sub %eax, %edx
-# ifdef USE_AS_STRNCPY
- add %eax, %ebx
-# endif
- movaps -7(%ecx), %xmm1
-
-L(Shl7LoopStart):
- movaps 9(%ecx), %xmm2
- movaps 25(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 41(%ecx), %xmm4
- movaps %xmm4, %xmm7
- movaps 57(%ecx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %eax
- movaps %xmm5, %xmm7
- palignr $7, %xmm4, %xmm5
- test %eax, %eax
- palignr $7, %xmm3, %xmm4
- jnz L(Shl7Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %ebx
- jbe L(StrncpyLeave7)
-# endif
- palignr $7, %xmm2, %xmm3
- lea 64(%ecx), %ecx
- palignr $7, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%edx)
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- jmp L(Shl7LoopStart)
-
-L(Shl7LoopExit):
- movlpd (%ecx), %xmm0
- movl 5(%ecx), %esi
- movlpd %xmm0, (%edx)
- movl %esi, 5(%edx)
- mov $9, %esi
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl8):
- movaps -8(%ecx), %xmm1
- movaps 8(%ecx), %xmm2
-L(Shl8Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit8Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl8LoopExit)
-
- palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
- movaps %xmm2, (%edx)
- movaps 24(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit8Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl8LoopExit)
-
- palignr $8, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 24(%ecx), %xmm2
- movaps %xmm3, %xmm1
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit8Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl8LoopExit)
-
- palignr $8, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 24(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit8Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl8LoopExit)
-
- palignr $8, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- lea 24(%ecx), %ecx
- lea 16(%edx), %edx
-
- mov %ecx, %eax
- and $-0x40, %ecx
- sub %ecx, %eax
- lea -8(%ecx), %ecx
- sub %eax, %edx
-# ifdef USE_AS_STRNCPY
- add %eax, %ebx
-# endif
- movaps -8(%ecx), %xmm1
-
-L(Shl8LoopStart):
- movaps 8(%ecx), %xmm2
- movaps 24(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 40(%ecx), %xmm4
- movaps %xmm4, %xmm7
- movaps 56(%ecx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %eax
- movaps %xmm5, %xmm7
- palignr $8, %xmm4, %xmm5
- test %eax, %eax
- palignr $8, %xmm3, %xmm4
- jnz L(Shl8Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %ebx
- jbe L(StrncpyLeave8)
-# endif
- palignr $8, %xmm2, %xmm3
- lea 64(%ecx), %ecx
- palignr $8, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%edx)
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- jmp L(Shl8LoopStart)
-
-L(Shl8LoopExit):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- mov $8, %esi
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl9):
- movaps -9(%ecx), %xmm1
- movaps 7(%ecx), %xmm2
-L(Shl9Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit9Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl9LoopExit)
-
- palignr $9, %xmm1, %xmm2
- movaps %xmm3, %xmm1
- movaps %xmm2, (%edx)
- movaps 23(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit9Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl9LoopExit)
-
- palignr $9, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 23(%ecx), %xmm2
- movaps %xmm3, %xmm1
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit9Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl9LoopExit)
-
- palignr $9, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 23(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit9Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl9LoopExit)
-
- palignr $9, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- lea 23(%ecx), %ecx
- lea 16(%edx), %edx
-
- mov %ecx, %eax
- and $-0x40, %ecx
- sub %ecx, %eax
- lea -7(%ecx), %ecx
- sub %eax, %edx
-# ifdef USE_AS_STRNCPY
- add %eax, %ebx
-# endif
- movaps -9(%ecx), %xmm1
-
-L(Shl9LoopStart):
- movaps 7(%ecx), %xmm2
- movaps 23(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 39(%ecx), %xmm4
- movaps %xmm4, %xmm7
- movaps 55(%ecx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %eax
- movaps %xmm5, %xmm7
- palignr $9, %xmm4, %xmm5
- test %eax, %eax
- palignr $9, %xmm3, %xmm4
- jnz L(Shl9Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %ebx
- jbe L(StrncpyLeave9)
-# endif
- palignr $9, %xmm2, %xmm3
- lea 64(%ecx), %ecx
- palignr $9, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%edx)
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- jmp L(Shl9LoopStart)
-
-L(Shl9LoopExit):
- movlpd -1(%ecx), %xmm0
- movlpd %xmm0, -1(%edx)
- mov $7, %esi
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl10):
- movaps -10(%ecx), %xmm1
- movaps 6(%ecx), %xmm2
-L(Shl10Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit10Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl10LoopExit)
-
- palignr $10, %xmm1, %xmm2
- movaps %xmm3, %xmm1
- movaps %xmm2, (%edx)
- movaps 22(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit10Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl10LoopExit)
-
- palignr $10, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 22(%ecx), %xmm2
- movaps %xmm3, %xmm1
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit10Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl10LoopExit)
-
- palignr $10, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 22(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit10Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl10LoopExit)
-
- palignr $10, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- lea 22(%ecx), %ecx
- lea 16(%edx), %edx
-
- mov %ecx, %eax
- and $-0x40, %ecx
- sub %ecx, %eax
- lea -6(%ecx), %ecx
- sub %eax, %edx
-# ifdef USE_AS_STRNCPY
- add %eax, %ebx
-# endif
- movaps -10(%ecx), %xmm1
-
-L(Shl10LoopStart):
- movaps 6(%ecx), %xmm2
- movaps 22(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 38(%ecx), %xmm4
- movaps %xmm4, %xmm7
- movaps 54(%ecx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %eax
- movaps %xmm5, %xmm7
- palignr $10, %xmm4, %xmm5
- test %eax, %eax
- palignr $10, %xmm3, %xmm4
- jnz L(Shl10Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %ebx
- jbe L(StrncpyLeave10)
-# endif
- palignr $10, %xmm2, %xmm3
- lea 64(%ecx), %ecx
- palignr $10, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%edx)
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- jmp L(Shl10LoopStart)
-
-L(Shl10LoopExit):
- movlpd -2(%ecx), %xmm0
- movlpd %xmm0, -2(%edx)
- mov $6, %esi
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl11):
- movaps -11(%ecx), %xmm1
- movaps 5(%ecx), %xmm2
-L(Shl11Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit11Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl11LoopExit)
-
- palignr $11, %xmm1, %xmm2
- movaps %xmm3, %xmm1
- movaps %xmm2, (%edx)
- movaps 21(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit11Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl11LoopExit)
-
- palignr $11, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 21(%ecx), %xmm2
- movaps %xmm3, %xmm1
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit11Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl11LoopExit)
-
- palignr $11, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 21(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit11Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl11LoopExit)
-
- palignr $11, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- lea 21(%ecx), %ecx
- lea 16(%edx), %edx
-
- mov %ecx, %eax
- and $-0x40, %ecx
- sub %ecx, %eax
- lea -5(%ecx), %ecx
- sub %eax, %edx
-# ifdef USE_AS_STRNCPY
- add %eax, %ebx
-# endif
- movaps -11(%ecx), %xmm1
-
-L(Shl11LoopStart):
- movaps 5(%ecx), %xmm2
- movaps 21(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 37(%ecx), %xmm4
- movaps %xmm4, %xmm7
- movaps 53(%ecx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %eax
- movaps %xmm5, %xmm7
- palignr $11, %xmm4, %xmm5
- test %eax, %eax
- palignr $11, %xmm3, %xmm4
- jnz L(Shl11Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %ebx
- jbe L(StrncpyLeave11)
-# endif
- palignr $11, %xmm2, %xmm3
- lea 64(%ecx), %ecx
- palignr $11, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%edx)
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- jmp L(Shl11LoopStart)
-
-L(Shl11LoopExit):
- movlpd -3(%ecx), %xmm0
- movlpd %xmm0, -3(%edx)
- mov $5, %esi
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl12):
- movaps -12(%ecx), %xmm1
- movaps 4(%ecx), %xmm2
-L(Shl12Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit12Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl12LoopExit)
-
- palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
- movaps %xmm2, (%edx)
- movaps 20(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit12Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl12LoopExit)
-
- palignr $12, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 20(%ecx), %xmm2
- movaps %xmm3, %xmm1
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit12Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl12LoopExit)
-
- palignr $12, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 20(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit12Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl12LoopExit)
-
- palignr $12, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- lea 20(%ecx), %ecx
- lea 16(%edx), %edx
-
- mov %ecx, %eax
- and $-0x40, %ecx
- sub %ecx, %eax
- lea -4(%ecx), %ecx
- sub %eax, %edx
-# ifdef USE_AS_STRNCPY
- add %eax, %ebx
-# endif
- movaps -12(%ecx), %xmm1
-
-L(Shl12LoopStart):
- movaps 4(%ecx), %xmm2
- movaps 20(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 36(%ecx), %xmm4
- movaps %xmm4, %xmm7
- movaps 52(%ecx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %eax
- movaps %xmm5, %xmm7
- palignr $12, %xmm4, %xmm5
- test %eax, %eax
- palignr $12, %xmm3, %xmm4
- jnz L(Shl12Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %ebx
- jbe L(StrncpyLeave12)
-# endif
- palignr $12, %xmm2, %xmm3
- lea 64(%ecx), %ecx
- palignr $12, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%edx)
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- jmp L(Shl12LoopStart)
-
-L(Shl12LoopExit):
- movl (%ecx), %esi
- movl %esi, (%edx)
- mov $4, %esi
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl13):
- movaps -13(%ecx), %xmm1
- movaps 3(%ecx), %xmm2
-L(Shl13Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit13Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl13LoopExit)
-
- palignr $13, %xmm1, %xmm2
- movaps %xmm3, %xmm1
- movaps %xmm2, (%edx)
- movaps 19(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit13Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl13LoopExit)
-
- palignr $13, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 19(%ecx), %xmm2
- movaps %xmm3, %xmm1
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit13Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl13LoopExit)
-
- palignr $13, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 19(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit13Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl13LoopExit)
-
- palignr $13, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- lea 19(%ecx), %ecx
- lea 16(%edx), %edx
-
- mov %ecx, %eax
- and $-0x40, %ecx
- sub %ecx, %eax
- lea -3(%ecx), %ecx
- sub %eax, %edx
-# ifdef USE_AS_STRNCPY
- add %eax, %ebx
-# endif
- movaps -13(%ecx), %xmm1
-
-L(Shl13LoopStart):
- movaps 3(%ecx), %xmm2
- movaps 19(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 35(%ecx), %xmm4
- movaps %xmm4, %xmm7
- movaps 51(%ecx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %eax
- movaps %xmm5, %xmm7
- palignr $13, %xmm4, %xmm5
- test %eax, %eax
- palignr $13, %xmm3, %xmm4
- jnz L(Shl13Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %ebx
- jbe L(StrncpyLeave13)
-# endif
- palignr $13, %xmm2, %xmm3
- lea 64(%ecx), %ecx
- palignr $13, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%edx)
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- jmp L(Shl13LoopStart)
-
-L(Shl13LoopExit):
- movl -1(%ecx), %esi
- movl %esi, -1(%edx)
- mov $3, %esi
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl14):
- movaps -14(%ecx), %xmm1
- movaps 2(%ecx), %xmm2
-L(Shl14Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit14Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl14LoopExit)
-
- palignr $14, %xmm1, %xmm2
- movaps %xmm3, %xmm1
- movaps %xmm2, (%edx)
- movaps 18(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit14Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl14LoopExit)
-
- palignr $14, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 18(%ecx), %xmm2
- movaps %xmm3, %xmm1
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit14Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl14LoopExit)
-
- palignr $14, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 18(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit14Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl14LoopExit)
-
- palignr $14, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- lea 18(%ecx), %ecx
- lea 16(%edx), %edx
-
- mov %ecx, %eax
- and $-0x40, %ecx
- sub %ecx, %eax
- lea -2(%ecx), %ecx
- sub %eax, %edx
-# ifdef USE_AS_STRNCPY
- add %eax, %ebx
-# endif
- movaps -14(%ecx), %xmm1
-
-L(Shl14LoopStart):
- movaps 2(%ecx), %xmm2
- movaps 18(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 34(%ecx), %xmm4
- movaps %xmm4, %xmm7
- movaps 50(%ecx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %eax
- movaps %xmm5, %xmm7
- palignr $14, %xmm4, %xmm5
- test %eax, %eax
- palignr $14, %xmm3, %xmm4
- jnz L(Shl14Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %ebx
- jbe L(StrncpyLeave14)
-# endif
- palignr $14, %xmm2, %xmm3
- lea 64(%ecx), %ecx
- palignr $14, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%edx)
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- jmp L(Shl14LoopStart)
-
-L(Shl14LoopExit):
- movl -2(%ecx), %esi
- movl %esi, -2(%edx)
- mov $2, %esi
- jmp L(CopyFrom1To16Bytes)
-
- .p2align 4
-L(Shl15):
- movaps -15(%ecx), %xmm1
- movaps 1(%ecx), %xmm2
-L(Shl15Start):
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit15Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl15LoopExit)
-
- palignr $15, %xmm1, %xmm2
- movaps %xmm3, %xmm1
- movaps %xmm2, (%edx)
- movaps 17(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit15Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl15LoopExit)
-
- palignr $15, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 17(%ecx), %xmm2
- movaps %xmm3, %xmm1
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit15Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl15LoopExit)
-
- palignr $15, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 17(%ecx), %xmm2
-
- pcmpeqb %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- jbe L(StrncpyExit15Case2OrCase3)
-# endif
- test %eax, %eax
- jnz L(Shl15LoopExit)
-
- palignr $15, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- lea 17(%ecx), %ecx
- lea 16(%edx), %edx
-
- mov %ecx, %eax
- and $-0x40, %ecx
- sub %ecx, %eax
- lea -1(%ecx), %ecx
- sub %eax, %edx
-# ifdef USE_AS_STRNCPY
- add %eax, %ebx
-# endif
- movaps -15(%ecx), %xmm1
-
-L(Shl15LoopStart):
- movaps 1(%ecx), %xmm2
- movaps 17(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 33(%ecx), %xmm4
- movaps %xmm4, %xmm7
- movaps 49(%ecx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqb %xmm0, %xmm7
- pmovmskb %xmm7, %eax
- movaps %xmm5, %xmm7
- palignr $15, %xmm4, %xmm5
- test %eax, %eax
- palignr $15, %xmm3, %xmm4
- jnz L(Shl15Start)
-# ifdef USE_AS_STRNCPY
- sub $64, %ebx
- jbe L(StrncpyLeave15)
-# endif
- palignr $15, %xmm2, %xmm3
- lea 64(%ecx), %ecx
- palignr $15, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%edx)
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- jmp L(Shl15LoopStart)
-
-L(Shl15LoopExit):
- movl -3(%ecx), %esi
- movl %esi, -3(%edx)
- mov $1, %esi
-# ifdef USE_AS_STRCAT
- jmp L(CopyFrom1To16Bytes)
-# endif
-
-
-# ifndef USE_AS_STRCAT
-
- .p2align 4
-L(CopyFrom1To16Bytes):
-# ifdef USE_AS_STRNCPY
- add $16, %ebx
-# endif
- add %esi, %edx
- add %esi, %ecx
-
- POP (%esi)
- test %al, %al
- jz L(ExitHigh8)
-
-L(CopyFrom1To16BytesLess8):
- mov %al, %ah
- and $15, %ah
- jz L(ExitHigh4)
-
- test $0x01, %al
- jnz L(Exit1)
- test $0x02, %al
- jnz L(Exit2)
- test $0x04, %al
- jnz L(Exit3)
-
- .p2align 4
-L(Exit4):
- movl (%ecx), %eax
- movl %eax, (%edx)
- SAVE_RESULT (3)
-# ifdef USE_AS_STRNCPY
- sub $4, %ebx
- lea 4(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
-L(ExitHigh4):
- test $0x10, %al
- jnz L(Exit5)
- test $0x20, %al
- jnz L(Exit6)
- test $0x40, %al
- jnz L(Exit7)
-
- .p2align 4
-L(Exit8):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- SAVE_RESULT (7)
-# ifdef USE_AS_STRNCPY
- sub $8, %ebx
- lea 8(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
-L(ExitHigh8):
- mov %ah, %al
- and $15, %al
- jz L(ExitHigh12)
-
- test $0x01, %ah
- jnz L(Exit9)
- test $0x02, %ah
- jnz L(Exit10)
- test $0x04, %ah
- jnz L(Exit11)
-
- .p2align 4
-L(Exit12):
- movlpd (%ecx), %xmm0
- movl 8(%ecx), %eax
- movlpd %xmm0, (%edx)
- movl %eax, 8(%edx)
- SAVE_RESULT (11)
-# ifdef USE_AS_STRNCPY
- sub $12, %ebx
- lea 12(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
-L(ExitHigh12):
- test $0x10, %ah
- jnz L(Exit13)
- test $0x20, %ah
- jnz L(Exit14)
- test $0x40, %ah
- jnz L(Exit15)
-
- .p2align 4
-L(Exit16):
- movdqu (%ecx), %xmm0
- movdqu %xmm0, (%edx)
- SAVE_RESULT (15)
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- lea 16(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
-# ifdef USE_AS_STRNCPY
-
- CFI_PUSH(%esi)
-
- .p2align 4
-L(CopyFrom1To16BytesCase2):
- add $16, %ebx
- add %esi, %ecx
- add %esi, %edx
-
- POP (%esi)
-
- test %al, %al
- jz L(ExitHighCase2)
-
- cmp $8, %ebx
- ja L(CopyFrom1To16BytesLess8)
-
- test $0x01, %al
- jnz L(Exit1)
- cmp $1, %ebx
- je L(Exit1)
- test $0x02, %al
- jnz L(Exit2)
- cmp $2, %ebx
- je L(Exit2)
- test $0x04, %al
- jnz L(Exit3)
- cmp $3, %ebx
- je L(Exit3)
- test $0x08, %al
- jnz L(Exit4)
- cmp $4, %ebx
- je L(Exit4)
- test $0x10, %al
- jnz L(Exit5)
- cmp $5, %ebx
- je L(Exit5)
- test $0x20, %al
- jnz L(Exit6)
- cmp $6, %ebx
- je L(Exit6)
- test $0x40, %al
- jnz L(Exit7)
- cmp $7, %ebx
- je L(Exit7)
- jmp L(Exit8)
-
- .p2align 4
-L(ExitHighCase2):
- cmp $8, %ebx
- jbe L(CopyFrom1To16BytesLess8Case3)
-
- test $0x01, %ah
- jnz L(Exit9)
- cmp $9, %ebx
- je L(Exit9)
- test $0x02, %ah
- jnz L(Exit10)
- cmp $10, %ebx
- je L(Exit10)
- test $0x04, %ah
- jnz L(Exit11)
- cmp $11, %ebx
- je L(Exit11)
- test $0x8, %ah
- jnz L(Exit12)
- cmp $12, %ebx
- je L(Exit12)
- test $0x10, %ah
- jnz L(Exit13)
- cmp $13, %ebx
- je L(Exit13)
- test $0x20, %ah
- jnz L(Exit14)
- cmp $14, %ebx
- je L(Exit14)
- test $0x40, %ah
- jnz L(Exit15)
- cmp $15, %ebx
- je L(Exit15)
- jmp L(Exit16)
-
- CFI_PUSH(%esi)
-
- .p2align 4
-L(CopyFrom1To16BytesCase2OrCase3):
- test %eax, %eax
- jnz L(CopyFrom1To16BytesCase2)
-
- .p2align 4
-L(CopyFrom1To16BytesCase3):
- add $16, %ebx
- add %esi, %edx
- add %esi, %ecx
-
- POP (%esi)
-
- cmp $8, %ebx
- ja L(ExitHigh8Case3)
-
-L(CopyFrom1To16BytesLess8Case3):
- cmp $4, %ebx
- ja L(ExitHigh4Case3)
-
- cmp $1, %ebx
- je L(Exit1)
- cmp $2, %ebx
- je L(Exit2)
- cmp $3, %ebx
- je L(Exit3)
- movl (%ecx), %eax
- movl %eax, (%edx)
- SAVE_RESULT (4)
- RETURN1
-
- .p2align 4
-L(ExitHigh4Case3):
- cmp $5, %ebx
- je L(Exit5)
- cmp $6, %ebx
- je L(Exit6)
- cmp $7, %ebx
- je L(Exit7)
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- SAVE_RESULT (8)
- RETURN1
-
- .p2align 4
-L(ExitHigh8Case3):
- cmp $12, %ebx
- ja L(ExitHigh12Case3)
-
- cmp $9, %ebx
- je L(Exit9)
- cmp $10, %ebx
- je L(Exit10)
- cmp $11, %ebx
- je L(Exit11)
- movlpd (%ecx), %xmm0
- movl 8(%ecx), %eax
- movlpd %xmm0, (%edx)
- movl %eax, 8(%edx)
- SAVE_RESULT (12)
- RETURN1
-
- .p2align 4
-L(ExitHigh12Case3):
- cmp $13, %ebx
- je L(Exit13)
- cmp $14, %ebx
- je L(Exit14)
- cmp $15, %ebx
- je L(Exit15)
- movlpd (%ecx), %xmm0
- movlpd 8(%ecx), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 8(%edx)
- SAVE_RESULT (16)
- RETURN1
-
-# endif
-
- .p2align 4
-L(Exit1):
- movb (%ecx), %al
- movb %al, (%edx)
- SAVE_RESULT (0)
-# ifdef USE_AS_STRNCPY
- sub $1, %ebx
- lea 1(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
-L(Exit2):
- movw (%ecx), %ax
- movw %ax, (%edx)
- SAVE_RESULT (1)
-# ifdef USE_AS_STRNCPY
- sub $2, %ebx
- lea 2(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
-L(Exit3):
- movw (%ecx), %ax
- movw %ax, (%edx)
- movb 2(%ecx), %al
- movb %al, 2(%edx)
- SAVE_RESULT (2)
-# ifdef USE_AS_STRNCPY
- sub $3, %ebx
- lea 3(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
-L(Exit5):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movb 4(%ecx), %al
- movb %al, 4(%edx)
- SAVE_RESULT (4)
-# ifdef USE_AS_STRNCPY
- sub $5, %ebx
- lea 5(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
-L(Exit6):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movw 4(%ecx), %ax
- movw %ax, 4(%edx)
- SAVE_RESULT (5)
-# ifdef USE_AS_STRNCPY
- sub $6, %ebx
- lea 6(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
-L(Exit7):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 3(%ecx), %eax
- movl %eax, 3(%edx)
- SAVE_RESULT (6)
-# ifdef USE_AS_STRNCPY
- sub $7, %ebx
- lea 7(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
-L(Exit9):
- movlpd (%ecx), %xmm0
- movb 8(%ecx), %al
- movlpd %xmm0, (%edx)
- movb %al, 8(%edx)
- SAVE_RESULT (8)
-# ifdef USE_AS_STRNCPY
- sub $9, %ebx
- lea 9(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
-L(Exit10):
- movlpd (%ecx), %xmm0
- movw 8(%ecx), %ax
- movlpd %xmm0, (%edx)
- movw %ax, 8(%edx)
- SAVE_RESULT (9)
-# ifdef USE_AS_STRNCPY
- sub $10, %ebx
- lea 10(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
-L(Exit11):
- movlpd (%ecx), %xmm0
- movl 7(%ecx), %eax
- movlpd %xmm0, (%edx)
- movl %eax, 7(%edx)
- SAVE_RESULT (10)
-# ifdef USE_AS_STRNCPY
- sub $11, %ebx
- lea 11(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
-L(Exit13):
- movlpd (%ecx), %xmm0
- movlpd 5(%ecx), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 5(%edx)
- SAVE_RESULT (12)
-# ifdef USE_AS_STRNCPY
- sub $13, %ebx
- lea 13(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
-L(Exit14):
- movlpd (%ecx), %xmm0
- movlpd 6(%ecx), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 6(%edx)
- SAVE_RESULT (13)
-# ifdef USE_AS_STRNCPY
- sub $14, %ebx
- lea 14(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
-L(Exit15):
- movlpd (%ecx), %xmm0
- movlpd 7(%ecx), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 7(%edx)
- SAVE_RESULT (14)
-# ifdef USE_AS_STRNCPY
- sub $15, %ebx
- lea 15(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
-CFI_POP (%edi)
-
-# ifdef USE_AS_STRNCPY
- .p2align 4
-L(Fill0):
- RETURN
-
- .p2align 4
-L(Fill1):
- movb %dl, (%ecx)
- RETURN
-
- .p2align 4
-L(Fill2):
- movw %dx, (%ecx)
- RETURN
-
- .p2align 4
-L(Fill3):
- movw %dx, (%ecx)
- movb %dl, 2(%ecx)
- RETURN
-
- .p2align 4
-L(Fill4):
- movl %edx, (%ecx)
- RETURN
-
- .p2align 4
-L(Fill5):
- movl %edx, (%ecx)
- movb %dl, 4(%ecx)
- RETURN
-
- .p2align 4
-L(Fill6):
- movl %edx, (%ecx)
- movw %dx, 4(%ecx)
- RETURN
-
- .p2align 4
-L(Fill7):
- movl %edx, (%ecx)
- movl %edx, 3(%ecx)
- RETURN
-
- .p2align 4
-L(Fill8):
- movlpd %xmm0, (%ecx)
- RETURN
-
- .p2align 4
-L(Fill9):
- movlpd %xmm0, (%ecx)
- movb %dl, 8(%ecx)
- RETURN
-
- .p2align 4
-L(Fill10):
- movlpd %xmm0, (%ecx)
- movw %dx, 8(%ecx)
- RETURN
-
- .p2align 4
-L(Fill11):
- movlpd %xmm0, (%ecx)
- movl %edx, 7(%ecx)
- RETURN
-
- .p2align 4
-L(Fill12):
- movlpd %xmm0, (%ecx)
- movl %edx, 8(%ecx)
- RETURN
-
- .p2align 4
-L(Fill13):
- movlpd %xmm0, (%ecx)
- movlpd %xmm0, 5(%ecx)
- RETURN
-
- .p2align 4
-L(Fill14):
- movlpd %xmm0, (%ecx)
- movlpd %xmm0, 6(%ecx)
- RETURN
-
- .p2align 4
-L(Fill15):
- movlpd %xmm0, (%ecx)
- movlpd %xmm0, 7(%ecx)
- RETURN
-
- .p2align 4
-L(Fill16):
- movlpd %xmm0, (%ecx)
- movlpd %xmm0, 8(%ecx)
- RETURN
-
- .p2align 4
-L(StrncpyFillExit1):
- lea 16(%ebx), %ebx
-L(FillFrom1To16Bytes):
- test %ebx, %ebx
- jz L(Fill0)
- cmp $16, %ebx
- je L(Fill16)
- cmp $8, %ebx
- je L(Fill8)
- jg L(FillMore8)
- cmp $4, %ebx
- je L(Fill4)
- jg L(FillMore4)
- cmp $2, %ebx
- jl L(Fill1)
- je L(Fill2)
- jg L(Fill3)
-L(FillMore8): /* but less than 16 */
- cmp $12, %ebx
- je L(Fill12)
- jl L(FillLess12)
- cmp $14, %ebx
- jl L(Fill13)
- je L(Fill14)
- jg L(Fill15)
-L(FillMore4): /* but less than 8 */
- cmp $6, %ebx
- jl L(Fill5)
- je L(Fill6)
- jg L(Fill7)
-L(FillLess12): /* but more than 8 */
- cmp $10, %ebx
- jl L(Fill9)
- je L(Fill10)
- jmp L(Fill11)
-
- CFI_PUSH(%edi)
-
- .p2align 4
-L(StrncpyFillTailWithZero1):
- POP (%edi)
-L(StrncpyFillTailWithZero):
- pxor %xmm0, %xmm0
- xor %edx, %edx
- sub $16, %ebx
- jbe L(StrncpyFillExit1)
-
- movlpd %xmm0, (%ecx)
- movlpd %xmm0, 8(%ecx)
-
- lea 16(%ecx), %ecx
-
- mov %ecx, %edx
- and $0xf, %edx
- sub %edx, %ecx
- add %edx, %ebx
- xor %edx, %edx
- sub $64, %ebx
- jb L(StrncpyFillLess64)
-
-L(StrncpyFillLoopMovdqa):
- movdqa %xmm0, (%ecx)
- movdqa %xmm0, 16(%ecx)
- movdqa %xmm0, 32(%ecx)
- movdqa %xmm0, 48(%ecx)
- lea 64(%ecx), %ecx
- sub $64, %ebx
- jae L(StrncpyFillLoopMovdqa)
-
-L(StrncpyFillLess64):
- add $32, %ebx
- jl L(StrncpyFillLess32)
- movdqa %xmm0, (%ecx)
- movdqa %xmm0, 16(%ecx)
- lea 32(%ecx), %ecx
- sub $16, %ebx
- jl L(StrncpyFillExit1)
- movdqa %xmm0, (%ecx)
- lea 16(%ecx), %ecx
- jmp L(FillFrom1To16Bytes)
-
-L(StrncpyFillLess32):
- add $16, %ebx
- jl L(StrncpyFillExit1)
- movdqa %xmm0, (%ecx)
- lea 16(%ecx), %ecx
- jmp L(FillFrom1To16Bytes)
-# endif
-
- .p2align 4
-L(ExitTail1):
- movb (%ecx), %al
- movb %al, (%edx)
- SAVE_RESULT_TAIL (0)
-# ifdef USE_AS_STRNCPY
- sub $1, %ebx
- lea 1(%edx), %ecx
- jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(ExitTail2):
- movw (%ecx), %ax
- movw %ax, (%edx)
- SAVE_RESULT_TAIL (1)
-# ifdef USE_AS_STRNCPY
- sub $2, %ebx
- lea 2(%edx), %ecx
- jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(ExitTail3):
- movw (%ecx), %ax
- movw %ax, (%edx)
- movb 2(%ecx), %al
- movb %al, 2(%edx)
- SAVE_RESULT_TAIL (2)
-# ifdef USE_AS_STRNCPY
- sub $3, %ebx
- lea 3(%edx), %ecx
- jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(ExitTail4):
- movl (%ecx), %eax
- movl %eax, (%edx)
- SAVE_RESULT_TAIL (3)
-# ifdef USE_AS_STRNCPY
- sub $4, %ebx
- lea 4(%edx), %ecx
- jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(ExitTail5):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movb 4(%ecx), %al
- movb %al, 4(%edx)
- SAVE_RESULT_TAIL (4)
-# ifdef USE_AS_STRNCPY
- sub $5, %ebx
- lea 5(%edx), %ecx
- jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(ExitTail6):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movw 4(%ecx), %ax
- movw %ax, 4(%edx)
- SAVE_RESULT_TAIL (5)
-# ifdef USE_AS_STRNCPY
- sub $6, %ebx
- lea 6(%edx), %ecx
- jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(ExitTail7):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl 3(%ecx), %eax
- movl %eax, 3(%edx)
- SAVE_RESULT_TAIL (6)
-# ifdef USE_AS_STRNCPY
- sub $7, %ebx
- lea 7(%edx), %ecx
- jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(ExitTail8):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- SAVE_RESULT_TAIL (7)
-# ifdef USE_AS_STRNCPY
- sub $8, %ebx
- lea 8(%edx), %ecx
- jnz L(StrncpyFillTailWithZero)
-# endif
- RETURN
-
- .p2align 4
-L(ExitTail9):
- movlpd (%ecx), %xmm0
- movb 8(%ecx), %al
- movlpd %xmm0, (%edx)
- movb %al, 8(%edx)
- SAVE_RESULT_TAIL (8)
-# ifdef USE_AS_STRNCPY
- sub $9, %ebx
- lea 9(%edx), %ecx
- jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(ExitTail10):
- movlpd (%ecx), %xmm0
- movw 8(%ecx), %ax
- movlpd %xmm0, (%edx)
- movw %ax, 8(%edx)
- SAVE_RESULT_TAIL (9)
-# ifdef USE_AS_STRNCPY
- sub $10, %ebx
- lea 10(%edx), %ecx
- jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(ExitTail11):
- movlpd (%ecx), %xmm0
- movl 7(%ecx), %eax
- movlpd %xmm0, (%edx)
- movl %eax, 7(%edx)
- SAVE_RESULT_TAIL (10)
-# ifdef USE_AS_STRNCPY
- sub $11, %ebx
- lea 11(%edx), %ecx
- jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(ExitTail12):
- movlpd (%ecx), %xmm0
- movl 8(%ecx), %eax
- movlpd %xmm0, (%edx)
- movl %eax, 8(%edx)
- SAVE_RESULT_TAIL (11)
-# ifdef USE_AS_STRNCPY
- sub $12, %ebx
- lea 12(%edx), %ecx
- jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(ExitTail13):
- movlpd (%ecx), %xmm0
- movlpd 5(%ecx), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 5(%edx)
- SAVE_RESULT_TAIL (12)
-# ifdef USE_AS_STRNCPY
- sub $13, %ebx
- lea 13(%edx), %ecx
- jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(ExitTail14):
- movlpd (%ecx), %xmm0
- movlpd 6(%ecx), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 6(%edx)
- SAVE_RESULT_TAIL (13)
-# ifdef USE_AS_STRNCPY
- sub $14, %ebx
- lea 14(%edx), %ecx
- jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN
-
- .p2align 4
-L(ExitTail15):
- movlpd (%ecx), %xmm0
- movlpd 7(%ecx), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 7(%edx)
- SAVE_RESULT_TAIL (14)
-# ifdef USE_AS_STRNCPY
- sub $15, %ebx
- lea 15(%edx), %ecx
- jnz L(StrncpyFillTailWithZero)
-# endif
- RETURN
-
- .p2align 4
-L(ExitTail16):
- movdqu (%ecx), %xmm0
- movdqu %xmm0, (%edx)
- SAVE_RESULT_TAIL (15)
-# ifdef USE_AS_STRNCPY
- sub $16, %ebx
- lea 16(%edx), %ecx
- jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN
-# endif
-
-# ifdef USE_AS_STRNCPY
-# ifndef USE_AS_STRCAT
- CFI_PUSH (%esi)
- CFI_PUSH (%edi)
-# endif
- .p2align 4
-L(StrncpyLeaveCase2OrCase3):
- test %eax, %eax
- jnz L(Aligned64LeaveCase2)
-
-L(Aligned64LeaveCase3):
- add $48, %ebx
- jle L(CopyFrom1To16BytesCase3)
- movaps %xmm4, -64(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase3)
- movaps %xmm5, -48(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase3)
- movaps %xmm6, -32(%edx)
- lea 16(%esi), %esi
- lea -16(%ebx), %ebx
- jmp L(CopyFrom1To16BytesCase3)
-
-L(Aligned64LeaveCase2):
- pcmpeqb %xmm4, %xmm0
- pmovmskb %xmm0, %eax
- add $48, %ebx
- jle L(CopyFrom1To16BytesCase2OrCase3)
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm5, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm4, -64(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm6, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm5, -48(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqb %xmm7, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm6, -32(%edx)
- lea 16(%esi), %esi
- lea -16(%ebx), %ebx
- jmp L(CopyFrom1To16BytesCase2)
-
-/*--------------------------------------------------*/
- .p2align 4
-L(StrncpyExit1Case2OrCase3):
- movlpd (%ecx), %xmm0
- movlpd 7(%ecx), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 7(%edx)
- mov $15, %esi
- test %eax, %eax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit2Case2OrCase3):
- movlpd (%ecx), %xmm0
- movlpd 6(%ecx), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 6(%edx)
- mov $14, %esi
- test %eax, %eax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit3Case2OrCase3):
- movlpd (%ecx), %xmm0
- movlpd 5(%ecx), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 5(%edx)
- mov $13, %esi
- test %eax, %eax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit4Case2OrCase3):
- movlpd (%ecx), %xmm0
- movl 8(%ecx), %esi
- movlpd %xmm0, (%edx)
- movl %esi, 8(%edx)
- mov $12, %esi
- test %eax, %eax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit5Case2OrCase3):
- movlpd (%ecx), %xmm0
- movl 7(%ecx), %esi
- movlpd %xmm0, (%edx)
- movl %esi, 7(%edx)
- mov $11, %esi
- test %eax, %eax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit6Case2OrCase3):
- movlpd (%ecx), %xmm0
- movl 6(%ecx), %esi
- movlpd %xmm0, (%edx)
- movl %esi, 6(%edx)
- mov $10, %esi
- test %eax, %eax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit7Case2OrCase3):
- movlpd (%ecx), %xmm0
- movl 5(%ecx), %esi
- movlpd %xmm0, (%edx)
- movl %esi, 5(%edx)
- mov $9, %esi
- test %eax, %eax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit8Case2OrCase3):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- mov $8, %esi
- test %eax, %eax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit9Case2OrCase3):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- mov $7, %esi
- test %eax, %eax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit10Case2OrCase3):
- movlpd -1(%ecx), %xmm0
- movlpd %xmm0, -1(%edx)
- mov $6, %esi
- test %eax, %eax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit11Case2OrCase3):
- movlpd -2(%ecx), %xmm0
- movlpd %xmm0, -2(%edx)
- mov $5, %esi
- test %eax, %eax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit12Case2OrCase3):
- movl (%ecx), %esi
- movl %esi, (%edx)
- mov $4, %esi
- test %eax, %eax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit13Case2OrCase3):
- movl -1(%ecx), %esi
- movl %esi, -1(%edx)
- mov $3, %esi
- test %eax, %eax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit14Case2OrCase3):
- movl -2(%ecx), %esi
- movl %esi, -2(%edx)
- mov $2, %esi
- test %eax, %eax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
- .p2align 4
-L(StrncpyExit15Case2OrCase3):
- movl -3(%ecx), %esi
- movl %esi, -3(%edx)
- mov $1, %esi
- test %eax, %eax
- jnz L(CopyFrom1To16BytesCase2)
- jmp L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave1):
- movaps %xmm2, %xmm3
- add $48, %ebx
- jle L(StrncpyExit1)
- palignr $1, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 31(%ecx), %xmm2
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit1)
- palignr $1, %xmm3, %xmm2
- movaps %xmm2, 16(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit1)
- movaps %xmm4, 32(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit1)
- movaps %xmm5, 48(%edx)
- lea 16(%esi), %esi
- lea -16(%ebx), %ebx
-L(StrncpyExit1):
- lea 15(%edx, %esi), %edx
- lea 15(%ecx, %esi), %ecx
- movdqu -16(%ecx), %xmm0
- xor %esi, %esi
- movdqu %xmm0, -16(%edx)
- jmp L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave2):
- movaps %xmm2, %xmm3
- add $48, %ebx
- jle L(StrncpyExit2)
- palignr $2, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 30(%ecx), %xmm2
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit2)
- palignr $2, %xmm3, %xmm2
- movaps %xmm2, 16(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit2)
- movaps %xmm4, 32(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit2)
- movaps %xmm5, 48(%edx)
- lea 16(%esi), %esi
- lea -16(%ebx), %ebx
-L(StrncpyExit2):
- lea 14(%edx, %esi), %edx
- lea 14(%ecx, %esi), %ecx
- movdqu -16(%ecx), %xmm0
- xor %esi, %esi
- movdqu %xmm0, -16(%edx)
- jmp L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave3):
- movaps %xmm2, %xmm3
- add $48, %ebx
- jle L(StrncpyExit3)
- palignr $3, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 29(%ecx), %xmm2
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit3)
- palignr $3, %xmm3, %xmm2
- movaps %xmm2, 16(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit3)
- movaps %xmm4, 32(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit3)
- movaps %xmm5, 48(%edx)
- lea 16(%esi), %esi
- lea -16(%ebx), %ebx
-L(StrncpyExit3):
- lea 13(%edx, %esi), %edx
- lea 13(%ecx, %esi), %ecx
- movdqu -16(%ecx), %xmm0
- xor %esi, %esi
- movdqu %xmm0, -16(%edx)
- jmp L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave4):
- movaps %xmm2, %xmm3
- add $48, %ebx
- jle L(StrncpyExit4)
- palignr $4, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 28(%ecx), %xmm2
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit4)
- palignr $4, %xmm3, %xmm2
- movaps %xmm2, 16(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit4)
- movaps %xmm4, 32(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit4)
- movaps %xmm5, 48(%edx)
- lea 16(%esi), %esi
- lea -16(%ebx), %ebx
-L(StrncpyExit4):
- lea 12(%edx, %esi), %edx
- lea 12(%ecx, %esi), %ecx
- movlpd -12(%ecx), %xmm0
- movl -4(%ecx), %eax
- movlpd %xmm0, -12(%edx)
- movl %eax, -4(%edx)
- xor %esi, %esi
- jmp L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave5):
- movaps %xmm2, %xmm3
- add $48, %ebx
- jle L(StrncpyExit5)
- palignr $5, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 27(%ecx), %xmm2
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit5)
- palignr $5, %xmm3, %xmm2
- movaps %xmm2, 16(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit5)
- movaps %xmm4, 32(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit5)
- movaps %xmm5, 48(%edx)
- lea 16(%esi), %esi
- lea -16(%ebx), %ebx
-L(StrncpyExit5):
- lea 11(%edx, %esi), %edx
- lea 11(%ecx, %esi), %ecx
- movlpd -11(%ecx), %xmm0
- movl -4(%ecx), %eax
- movlpd %xmm0, -11(%edx)
- movl %eax, -4(%edx)
- xor %esi, %esi
- jmp L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave6):
- movaps %xmm2, %xmm3
- add $48, %ebx
- jle L(StrncpyExit6)
- palignr $6, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 26(%ecx), %xmm2
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit6)
- palignr $6, %xmm3, %xmm2
- movaps %xmm2, 16(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit6)
- movaps %xmm4, 32(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit6)
- movaps %xmm5, 48(%edx)
- lea 16(%esi), %esi
- lea -16(%ebx), %ebx
-L(StrncpyExit6):
- lea 10(%edx, %esi), %edx
- lea 10(%ecx, %esi), %ecx
-
- movlpd -10(%ecx), %xmm0
- movw -2(%ecx), %ax
- movlpd %xmm0, -10(%edx)
- movw %ax, -2(%edx)
- xor %esi, %esi
- jmp L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave7):
- movaps %xmm2, %xmm3
- add $48, %ebx
- jle L(StrncpyExit7)
- palignr $7, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 25(%ecx), %xmm2
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit7)
- palignr $7, %xmm3, %xmm2
- movaps %xmm2, 16(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit7)
- movaps %xmm4, 32(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit7)
- movaps %xmm5, 48(%edx)
- lea 16(%esi), %esi
- lea -16(%ebx), %ebx
-L(StrncpyExit7):
- lea 9(%edx, %esi), %edx
- lea 9(%ecx, %esi), %ecx
-
- movlpd -9(%ecx), %xmm0
- movb -1(%ecx), %ah
- movlpd %xmm0, -9(%edx)
- movb %ah, -1(%edx)
- xor %esi, %esi
- jmp L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave8):
- movaps %xmm2, %xmm3
- add $48, %ebx
- jle L(StrncpyExit8)
- palignr $8, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 24(%ecx), %xmm2
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit8)
- palignr $8, %xmm3, %xmm2
- movaps %xmm2, 16(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit8)
- movaps %xmm4, 32(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit8)
- movaps %xmm5, 48(%edx)
- lea 16(%esi), %esi
- lea -16(%ebx), %ebx
-L(StrncpyExit8):
- lea 8(%edx, %esi), %edx
- lea 8(%ecx, %esi), %ecx
- movlpd -8(%ecx), %xmm0
- movlpd %xmm0, -8(%edx)
- xor %esi, %esi
- jmp L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave9):
- movaps %xmm2, %xmm3
- add $48, %ebx
- jle L(StrncpyExit9)
- palignr $9, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 23(%ecx), %xmm2
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit9)
- palignr $9, %xmm3, %xmm2
- movaps %xmm2, 16(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit9)
- movaps %xmm4, 32(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit9)
- movaps %xmm5, 48(%edx)
- lea 16(%esi), %esi
- lea -16(%ebx), %ebx
-L(StrncpyExit9):
- lea 7(%edx, %esi), %edx
- lea 7(%ecx, %esi), %ecx
-
- movlpd -8(%ecx), %xmm0
- movlpd %xmm0, -8(%edx)
- xor %esi, %esi
- jmp L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave10):
- movaps %xmm2, %xmm3
- add $48, %ebx
- jle L(StrncpyExit10)
- palignr $10, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 22(%ecx), %xmm2
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit10)
- palignr $10, %xmm3, %xmm2
- movaps %xmm2, 16(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit10)
- movaps %xmm4, 32(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit10)
- movaps %xmm5, 48(%edx)
- lea 16(%esi), %esi
- lea -16(%ebx), %ebx
-L(StrncpyExit10):
- lea 6(%edx, %esi), %edx
- lea 6(%ecx, %esi), %ecx
-
- movlpd -8(%ecx), %xmm0
- movlpd %xmm0, -8(%edx)
- xor %esi, %esi
- jmp L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave11):
- movaps %xmm2, %xmm3
- add $48, %ebx
- jle L(StrncpyExit11)
- palignr $11, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 21(%ecx), %xmm2
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit11)
- palignr $11, %xmm3, %xmm2
- movaps %xmm2, 16(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit11)
- movaps %xmm4, 32(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit11)
- movaps %xmm5, 48(%edx)
- lea 16(%esi), %esi
- lea -16(%ebx), %ebx
-L(StrncpyExit11):
- lea 5(%edx, %esi), %edx
- lea 5(%ecx, %esi), %ecx
- movl -5(%ecx), %esi
- movb -1(%ecx), %ah
- movl %esi, -5(%edx)
- movb %ah, -1(%edx)
- xor %esi, %esi
- jmp L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave12):
- movaps %xmm2, %xmm3
- add $48, %ebx
- jle L(StrncpyExit12)
- palignr $12, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 20(%ecx), %xmm2
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit12)
- palignr $12, %xmm3, %xmm2
- movaps %xmm2, 16(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit12)
- movaps %xmm4, 32(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit12)
- movaps %xmm5, 48(%edx)
- lea 16(%esi), %esi
- lea -16(%ebx), %ebx
-L(StrncpyExit12):
- lea 4(%edx, %esi), %edx
- lea 4(%ecx, %esi), %ecx
- movl -4(%ecx), %eax
- movl %eax, -4(%edx)
- xor %esi, %esi
- jmp L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave13):
- movaps %xmm2, %xmm3
- add $48, %ebx
- jle L(StrncpyExit13)
- palignr $13, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 19(%ecx), %xmm2
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit13)
- palignr $13, %xmm3, %xmm2
- movaps %xmm2, 16(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit13)
- movaps %xmm4, 32(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit13)
- movaps %xmm5, 48(%edx)
- lea 16(%esi), %esi
- lea -16(%ebx), %ebx
-L(StrncpyExit13):
- lea 3(%edx, %esi), %edx
- lea 3(%ecx, %esi), %ecx
-
- movl -4(%ecx), %eax
- movl %eax, -4(%edx)
- xor %esi, %esi
- jmp L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave14):
- movaps %xmm2, %xmm3
- add $48, %ebx
- jle L(StrncpyExit14)
- palignr $14, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 18(%ecx), %xmm2
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit14)
- palignr $14, %xmm3, %xmm2
- movaps %xmm2, 16(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit14)
- movaps %xmm4, 32(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit14)
- movaps %xmm5, 48(%edx)
- lea 16(%esi), %esi
- lea -16(%ebx), %ebx
-L(StrncpyExit14):
- lea 2(%edx, %esi), %edx
- lea 2(%ecx, %esi), %ecx
- movw -2(%ecx), %ax
- movw %ax, -2(%edx)
- xor %esi, %esi
- jmp L(CopyFrom1To16BytesCase3)
-
-L(StrncpyLeave15):
- movaps %xmm2, %xmm3
- add $48, %ebx
- jle L(StrncpyExit15)
- palignr $15, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 17(%ecx), %xmm2
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit15)
- palignr $15, %xmm3, %xmm2
- movaps %xmm2, 16(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit15)
- movaps %xmm4, 32(%edx)
- lea 16(%esi), %esi
- sub $16, %ebx
- jbe L(StrncpyExit15)
- movaps %xmm5, 48(%edx)
- lea 16(%esi), %esi
- lea -16(%ebx), %ebx
-L(StrncpyExit15):
- lea 1(%edx, %esi), %edx
- lea 1(%ecx, %esi), %ecx
- movb -1(%ecx), %ah
- movb %ah, -1(%edx)
- xor %esi, %esi
- jmp L(CopyFrom1To16BytesCase3)
-# endif
-
-# ifndef USE_AS_STRCAT
-# ifdef USE_AS_STRNCPY
- CFI_POP (%esi)
- CFI_POP (%edi)
-
- .p2align 4
-L(ExitTail0):
- movl %edx, %eax
- RETURN
-
- .p2align 4
-L(StrncpyExit15Bytes):
- cmp $12, %ebx
- jbe L(StrncpyExit12Bytes)
- cmpb $0, 8(%ecx)
- jz L(ExitTail9)
- cmpb $0, 9(%ecx)
- jz L(ExitTail10)
- cmpb $0, 10(%ecx)
- jz L(ExitTail11)
- cmpb $0, 11(%ecx)
- jz L(ExitTail12)
- cmp $13, %ebx
- je L(ExitTail13)
- cmpb $0, 12(%ecx)
- jz L(ExitTail13)
- cmp $14, %ebx
- je L(ExitTail14)
- cmpb $0, 13(%ecx)
- jz L(ExitTail14)
- movlpd (%ecx), %xmm0
- movlpd 7(%ecx), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 14(%edx), %eax
- cmpb $1, (%eax)
- sbb $-1, %eax
-# else
- movl %edx, %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit12Bytes):
- cmp $9, %ebx
- je L(ExitTail9)
- cmpb $0, 8(%ecx)
- jz L(ExitTail9)
- cmp $10, %ebx
- je L(ExitTail10)
- cmpb $0, 9(%ecx)
- jz L(ExitTail10)
- cmp $11, %ebx
- je L(ExitTail11)
- cmpb $0, 10(%ecx)
- jz L(ExitTail11)
- movlpd (%ecx), %xmm0
- movl 8(%ecx), %eax
- movlpd %xmm0, (%edx)
- movl %eax, 8(%edx)
- SAVE_RESULT_TAIL (11)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit8Bytes):
- cmp $4, %ebx
- jbe L(StrncpyExit4Bytes)
- cmpb $0, (%ecx)
- jz L(ExitTail1)
- cmpb $0, 1(%ecx)
- jz L(ExitTail2)
- cmpb $0, 2(%ecx)
- jz L(ExitTail3)
- cmpb $0, 3(%ecx)
- jz L(ExitTail4)
-
- cmp $5, %ebx
- je L(ExitTail5)
- cmpb $0, 4(%ecx)
- jz L(ExitTail5)
- cmp $6, %ebx
- je L(ExitTail6)
- cmpb $0, 5(%ecx)
- jz L(ExitTail6)
- cmp $7, %ebx
- je L(ExitTail7)
- cmpb $0, 6(%ecx)
- jz L(ExitTail7)
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
-# ifdef USE_AS_STPCPY
- lea 7(%edx), %eax
- cmpb $1, (%eax)
- sbb $-1, %eax
-# else
- movl %edx, %eax
-# endif
- RETURN
-
- .p2align 4
-L(StrncpyExit4Bytes):
- test %ebx, %ebx
- jz L(ExitTail0)
- cmp $1, %ebx
- je L(ExitTail1)
- cmpb $0, (%ecx)
- jz L(ExitTail1)
- cmp $2, %ebx
- je L(ExitTail2)
- cmpb $0, 1(%ecx)
- jz L(ExitTail2)
- cmp $3, %ebx
- je L(ExitTail3)
- cmpb $0, 2(%ecx)
- jz L(ExitTail3)
- movl (%ecx), %eax
- movl %eax, (%edx)
- SAVE_RESULT_TAIL (3)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
- RETURN
-# endif
-
-END (STRCPY)
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcpy.S b/sysdeps/i386/i686/multiarch/strcpy.S
deleted file mode 100644
index ffbc03c6d5..0000000000
--- a/sysdeps/i386/i686/multiarch/strcpy.S
+++ /dev/null
@@ -1,116 +0,0 @@
-/* Multiple versions of strcpy
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if !defined (USE_AS_STPCPY) && !defined (USE_AS_STRNCPY)
-# ifndef STRCPY
-# define STRCPY strcpy
-# endif
-#endif
-
-#ifdef USE_AS_STPCPY
-# ifdef USE_AS_STRNCPY
-# define STRCPY_SSSE3 __stpncpy_ssse3
-# define STRCPY_SSE2 __stpncpy_sse2
-# define STRCPY_IA32 __stpncpy_ia32
-# define __GI_STRCPY __GI_stpncpy
-# define __GI___STRCPY __GI___stpncpy
-# else
-# define STRCPY_SSSE3 __stpcpy_ssse3
-# define STRCPY_SSE2 __stpcpy_sse2
-# define STRCPY_IA32 __stpcpy_ia32
-# define __GI_STRCPY __GI_stpcpy
-# define __GI___STRCPY __GI___stpcpy
-# endif
-#else
-# ifdef USE_AS_STRNCPY
-# define STRCPY_SSSE3 __strncpy_ssse3
-# define STRCPY_SSE2 __strncpy_sse2
-# define STRCPY_IA32 __strncpy_ia32
-# define __GI_STRCPY __GI_strncpy
-# else
-# define STRCPY_SSSE3 __strcpy_ssse3
-# define STRCPY_SSE2 __strcpy_sse2
-# define STRCPY_IA32 __strcpy_ia32
-# define __GI_STRCPY __GI_strcpy
-# endif
-#endif
-
-
-/* Define multiple versions only for the definition in libc. Don't
- define multiple versions for strncpy in static library since we
- need strncpy before the initialization happened. */
-#if IS_IN (libc)
-
- .text
-ENTRY(STRCPY)
- .type STRCPY, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (STRCPY_IA32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (STRCPY_SSE2)
- HAS_ARCH_FEATURE (Fast_Unaligned_Load)
- jnz 2f
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- LOAD_FUNC_GOT_EAX (STRCPY_SSSE3)
-2: ret
-END(STRCPY)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type STRCPY_IA32, @function; \
- .align 16; \
- .globl STRCPY_IA32; \
- .hidden STRCPY_IA32; \
- STRCPY_IA32: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size STRCPY_IA32, .-STRCPY_IA32
-
-# ifdef SHARED
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcpy calls through a PLT.
- The speedup we get from using SSSE3 instruction is likely eaten away
- by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_STRCPY; __GI_STRCPY = STRCPY_IA32
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
- .globl __GI___STRCPY; __GI___STRCPY = STRCPY_IA32
-
-# endif
-#endif
-
-#ifdef USE_AS_STPCPY
-# ifdef USE_AS_STRNCPY
-# include "../../stpncpy.S"
-# else
-# include "../../i586/stpcpy.S"
-# endif
-#else
-# ifndef USE_AS_STRNCPY
-# include "../../i586/strcpy.S"
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strcspn-c.c b/sysdeps/i386/i686/multiarch/strcspn-c.c
deleted file mode 100644
index 6d61e190a8..0000000000
--- a/sysdeps/i386/i686/multiarch/strcspn-c.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define __strcspn_sse2 __strcspn_ia32
-#include <sysdeps/x86_64/multiarch/strcspn-c.c>
diff --git a/sysdeps/i386/i686/multiarch/strcspn.S b/sysdeps/i386/i686/multiarch/strcspn.S
deleted file mode 100644
index 21e5093924..0000000000
--- a/sysdeps/i386/i686/multiarch/strcspn.S
+++ /dev/null
@@ -1,75 +0,0 @@
-/* Multiple versions of strcspn
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2009-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <config.h>
-#include <sysdep.h>
-#include <init-arch.h>
-
-#ifdef USE_AS_STRPBRK
-#define STRCSPN_SSE42 __strpbrk_sse42
-#define STRCSPN_IA32 __strpbrk_ia32
-#define __GI_STRCSPN __GI_strpbrk
-#else
-#ifndef STRCSPN
-#define STRCSPN strcspn
-#define STRCSPN_SSE42 __strcspn_sse42
-#define STRCSPN_IA32 __strcspn_ia32
-#define __GI_STRCSPN __GI_strcspn
-#endif
-#endif
-
-/* Define multiple versions only for the definition in libc. Don't
- define multiple versions for strpbrk in static library since we
- need strpbrk before the initialization happened. */
-#if (defined SHARED || !defined USE_AS_STRPBRK) && IS_IN (libc)
- .text
-ENTRY(STRCSPN)
- .type STRCSPN, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (STRCSPN_IA32)
- HAS_CPU_FEATURE (SSE4_2)
- jz 2f
- LOAD_FUNC_GOT_EAX (STRCSPN_SSE42)
-2: ret
-END(STRCSPN)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type STRCSPN_IA32, @function; \
- .globl STRCSPN_IA32; \
- .p2align 4; \
- STRCSPN_IA32: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size STRCSPN_IA32, .-STRCSPN_IA32
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
- they will be called without setting up EBX needed for PLT which is
- used by IFUNC. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_STRCSPN; __GI_STRCSPN = STRCSPN_IA32
-#endif
-
-#ifdef USE_AS_STRPBRK
-#include "../../strpbrk.S"
-#else
-#include "../../strcspn.S"
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
deleted file mode 100644
index d3ea864bab..0000000000
--- a/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
+++ /dev/null
@@ -1,125 +0,0 @@
-/* strlen with SSE2 and BSF
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if defined SHARED && IS_IN (libc)
-
-#include <sysdep.h>
-
-#define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-#define PUSH(REG) pushl REG; CFI_PUSH (REG)
-#define POP(REG) popl REG; CFI_POP (REG)
-#define PARMS 4 + 8 /* Preserve ESI and EDI. */
-#define STR PARMS
-#define ENTRANCE PUSH (%esi); PUSH (%edi); cfi_remember_state
-#define RETURN POP (%edi); POP (%esi); ret; \
- cfi_restore_state; cfi_remember_state
-
- .text
-ENTRY ( __strlen_sse2_bsf)
- ENTRANCE
- mov STR(%esp), %edi
- xor %eax, %eax
- mov %edi, %ecx
- and $0x3f, %ecx
- pxor %xmm0, %xmm0
- cmp $0x30, %ecx
- ja L(next)
- movdqu (%edi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %edx
- test %edx, %edx
- jnz L(exit_less16)
- mov %edi, %eax
- and $-16, %eax
- jmp L(align16_start)
-L(next):
-
- mov %edi, %eax
- and $-16, %eax
- pcmpeqb (%eax), %xmm0
- mov $-1, %esi
- sub %eax, %ecx
- shl %cl, %esi
- pmovmskb %xmm0, %edx
- and %esi, %edx
- jnz L(exit)
-L(align16_start):
- pxor %xmm0, %xmm0
- pxor %xmm1, %xmm1
- pxor %xmm2, %xmm2
- pxor %xmm3, %xmm3
- .p2align 4
-L(align16_loop):
- pcmpeqb 16(%eax), %xmm0
- pmovmskb %xmm0, %edx
- test %edx, %edx
- jnz L(exit16)
-
- pcmpeqb 32(%eax), %xmm1
- pmovmskb %xmm1, %edx
- test %edx, %edx
- jnz L(exit32)
-
- pcmpeqb 48(%eax), %xmm2
- pmovmskb %xmm2, %edx
- test %edx, %edx
- jnz L(exit48)
-
- pcmpeqb 64(%eax), %xmm3
- pmovmskb %xmm3, %edx
- lea 64(%eax), %eax
- test %edx, %edx
- jz L(align16_loop)
-L(exit):
- sub %edi, %eax
-L(exit_less16):
- bsf %edx, %edx
- add %edx, %eax
- RETURN
-L(exit16):
- sub %edi, %eax
- bsf %edx, %edx
- add %edx, %eax
- add $16, %eax
- RETURN
-L(exit32):
- sub %edi, %eax
- bsf %edx, %edx
- add %edx, %eax
- add $32, %eax
- RETURN
-L(exit48):
- sub %edi, %eax
- bsf %edx, %edx
- add %edx, %eax
- add $48, %eax
- POP (%edi)
- POP (%esi)
- ret
-
-END ( __strlen_sse2_bsf)
-
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2.S b/sysdeps/i386/i686/multiarch/strlen-sse2.S
deleted file mode 100644
index 36fc1469d0..0000000000
--- a/sysdeps/i386/i686/multiarch/strlen-sse2.S
+++ /dev/null
@@ -1,695 +0,0 @@
-/* strlen with SSE2
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-/* for strlen only SHARED version is optimized, for strcat, strncat, strnlen both STATIC and SHARED are optimized */
-
-#if (defined USE_AS_STRNLEN || defined USE_AS_STRCAT || defined SHARED) && IS_IN (libc)
-
-# ifndef USE_AS_STRCAT
-
-# include <sysdep.h>
-# define PARMS 4
-# define STR PARMS
-# define RETURN ret
-
-# ifdef USE_AS_STRNLEN
-# define LEN PARMS + 8
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-# undef RETURN
-# define RETURN POP (%edi); CFI_PUSH(%edi); ret
-# endif
-
-# ifndef STRLEN
-# define STRLEN __strlen_sse2
-# endif
-
- atom_text_section
-ENTRY (STRLEN)
- mov STR(%esp), %edx
-# ifdef USE_AS_STRNLEN
- PUSH (%edi)
- movl LEN(%esp), %edi
- sub $4, %edi
- jbe L(len_less4_prolog)
-# endif
-# endif
- xor %eax, %eax
- cmpb $0, (%edx)
- jz L(exit_tail0)
- cmpb $0, 1(%edx)
- jz L(exit_tail1)
- cmpb $0, 2(%edx)
- jz L(exit_tail2)
- cmpb $0, 3(%edx)
- jz L(exit_tail3)
-
-# ifdef USE_AS_STRNLEN
- sub $4, %edi
- jbe L(len_less8_prolog)
-# endif
-
- cmpb $0, 4(%edx)
- jz L(exit_tail4)
- cmpb $0, 5(%edx)
- jz L(exit_tail5)
- cmpb $0, 6(%edx)
- jz L(exit_tail6)
- cmpb $0, 7(%edx)
- jz L(exit_tail7)
-
-# ifdef USE_AS_STRNLEN
- sub $4, %edi
- jbe L(len_less12_prolog)
-# endif
-
- cmpb $0, 8(%edx)
- jz L(exit_tail8)
- cmpb $0, 9(%edx)
- jz L(exit_tail9)
- cmpb $0, 10(%edx)
- jz L(exit_tail10)
- cmpb $0, 11(%edx)
- jz L(exit_tail11)
-
-# ifdef USE_AS_STRNLEN
- sub $4, %edi
- jbe L(len_less16_prolog)
-# endif
-
- cmpb $0, 12(%edx)
- jz L(exit_tail12)
- cmpb $0, 13(%edx)
- jz L(exit_tail13)
- cmpb $0, 14(%edx)
- jz L(exit_tail14)
- cmpb $0, 15(%edx)
- jz L(exit_tail15)
-
- pxor %xmm0, %xmm0
- lea 16(%edx), %eax
- mov %eax, %ecx
- and $-16, %eax
-
-# ifdef USE_AS_STRNLEN
- and $15, %edx
- add %edx, %edi
- sub $64, %edi
- jbe L(len_less64)
-# endif
-
- pcmpeqb (%eax), %xmm0
- pmovmskb %xmm0, %edx
- pxor %xmm1, %xmm1
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
- pcmpeqb (%eax), %xmm1
- pmovmskb %xmm1, %edx
- pxor %xmm2, %xmm2
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
- pcmpeqb (%eax), %xmm2
- pmovmskb %xmm2, %edx
- pxor %xmm3, %xmm3
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
- pcmpeqb (%eax), %xmm3
- pmovmskb %xmm3, %edx
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
-# ifdef USE_AS_STRNLEN
- sub $64, %edi
- jbe L(len_less64)
-# endif
-
- pcmpeqb (%eax), %xmm0
- pmovmskb %xmm0, %edx
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
- pcmpeqb (%eax), %xmm1
- pmovmskb %xmm1, %edx
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
- pcmpeqb (%eax), %xmm2
- pmovmskb %xmm2, %edx
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
- pcmpeqb (%eax), %xmm3
- pmovmskb %xmm3, %edx
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
-# ifdef USE_AS_STRNLEN
- sub $64, %edi
- jbe L(len_less64)
-# endif
-
- pcmpeqb (%eax), %xmm0
- pmovmskb %xmm0, %edx
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
- pcmpeqb (%eax), %xmm1
- pmovmskb %xmm1, %edx
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
- pcmpeqb (%eax), %xmm2
- pmovmskb %xmm2, %edx
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
- pcmpeqb (%eax), %xmm3
- pmovmskb %xmm3, %edx
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
-# ifdef USE_AS_STRNLEN
- sub $64, %edi
- jbe L(len_less64)
-# endif
-
- pcmpeqb (%eax), %xmm0
- pmovmskb %xmm0, %edx
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
- pcmpeqb (%eax), %xmm1
- pmovmskb %xmm1, %edx
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
- pcmpeqb (%eax), %xmm2
- pmovmskb %xmm2, %edx
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
- pcmpeqb (%eax), %xmm3
- pmovmskb %xmm3, %edx
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
-# ifdef USE_AS_STRNLEN
- mov %eax, %edx
- and $63, %edx
- add %edx, %edi
-# endif
-
- and $-0x40, %eax
-
- .p2align 4
-L(aligned_64_loop):
-# ifdef USE_AS_STRNLEN
- sub $64, %edi
- jbe L(len_less64)
-# endif
- movaps (%eax), %xmm0
- movaps 16(%eax), %xmm1
- movaps 32(%eax), %xmm2
- movaps 48(%eax), %xmm6
- pminub %xmm1, %xmm0
- pminub %xmm6, %xmm2
- pminub %xmm0, %xmm2
- pcmpeqb %xmm3, %xmm2
- pmovmskb %xmm2, %edx
- test %edx, %edx
- lea 64(%eax), %eax
- jz L(aligned_64_loop)
-
- pcmpeqb -64(%eax), %xmm3
- pmovmskb %xmm3, %edx
- test %edx, %edx
- lea 48(%ecx), %ecx
- jnz L(exit)
-
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %edx
- test %edx, %edx
- lea -16(%ecx), %ecx
- jnz L(exit)
-
- pcmpeqb -32(%eax), %xmm3
- pmovmskb %xmm3, %edx
- test %edx, %edx
- lea -16(%ecx), %ecx
- jnz L(exit)
-
- pcmpeqb %xmm6, %xmm3
- pmovmskb %xmm3, %edx
- lea -16(%ecx), %ecx
-L(exit):
- sub %ecx, %eax
- test %dl, %dl
- jz L(exit_high)
-
- mov %dl, %cl
- and $15, %cl
- jz L(exit_8)
- test $0x01, %dl
- jnz L(exit_tail0)
- test $0x02, %dl
- jnz L(exit_tail1)
- test $0x04, %dl
- jnz L(exit_tail2)
- add $3, %eax
- RETURN
-
- .p2align 4
-L(exit_8):
- test $0x10, %dl
- jnz L(exit_tail4)
- test $0x20, %dl
- jnz L(exit_tail5)
- test $0x40, %dl
- jnz L(exit_tail6)
- add $7, %eax
- RETURN
-
- .p2align 4
-L(exit_high):
- mov %dh, %ch
- and $15, %ch
- jz L(exit_high_8)
- test $0x01, %dh
- jnz L(exit_tail8)
- test $0x02, %dh
- jnz L(exit_tail9)
- test $0x04, %dh
- jnz L(exit_tail10)
- add $11, %eax
- RETURN
-
- .p2align 4
-L(exit_high_8):
- test $0x10, %dh
- jnz L(exit_tail12)
- test $0x20, %dh
- jnz L(exit_tail13)
- test $0x40, %dh
- jnz L(exit_tail14)
- add $15, %eax
-L(exit_tail0):
- RETURN
-
-# ifdef USE_AS_STRNLEN
-
- .p2align 4
-L(len_less64):
- pxor %xmm0, %xmm0
- add $64, %edi
-
- pcmpeqb (%eax), %xmm0
- pmovmskb %xmm0, %edx
- pxor %xmm1, %xmm1
- lea 16(%eax), %eax
- test %edx, %edx
- jnz L(strnlen_exit)
-
- sub $16, %edi
- jbe L(return_start_len)
-
- pcmpeqb (%eax), %xmm1
- pmovmskb %xmm1, %edx
- lea 16(%eax), %eax
- test %edx, %edx
- jnz L(strnlen_exit)
-
- sub $16, %edi
- jbe L(return_start_len)
-
- pcmpeqb (%eax), %xmm0
- pmovmskb %xmm0, %edx
- lea 16(%eax), %eax
- test %edx, %edx
- jnz L(strnlen_exit)
-
- sub $16, %edi
- jbe L(return_start_len)
-
- pcmpeqb (%eax), %xmm1
- pmovmskb %xmm1, %edx
- lea 16(%eax), %eax
- test %edx, %edx
- jnz L(strnlen_exit)
-
- movl LEN(%esp), %eax
- RETURN
-
- .p2align 4
-L(strnlen_exit):
- sub %ecx, %eax
-
- test %dl, %dl
- jz L(strnlen_exit_high)
- mov %dl, %cl
- and $15, %cl
- jz L(strnlen_exit_8)
- test $0x01, %dl
- jnz L(exit_tail0)
- test $0x02, %dl
- jnz L(strnlen_exit_tail1)
- test $0x04, %dl
- jnz L(strnlen_exit_tail2)
- sub $4, %edi
- jb L(return_start_len)
- lea 3(%eax), %eax
- RETURN
-
- .p2align 4
-L(strnlen_exit_8):
- test $0x10, %dl
- jnz L(strnlen_exit_tail4)
- test $0x20, %dl
- jnz L(strnlen_exit_tail5)
- test $0x40, %dl
- jnz L(strnlen_exit_tail6)
- sub $8, %edi
- jb L(return_start_len)
- lea 7(%eax), %eax
- RETURN
-
- .p2align 4
-L(strnlen_exit_high):
- mov %dh, %ch
- and $15, %ch
- jz L(strnlen_exit_high_8)
- test $0x01, %dh
- jnz L(strnlen_exit_tail8)
- test $0x02, %dh
- jnz L(strnlen_exit_tail9)
- test $0x04, %dh
- jnz L(strnlen_exit_tail10)
- sub $12, %edi
- jb L(return_start_len)
- lea 11(%eax), %eax
- RETURN
-
- .p2align 4
-L(strnlen_exit_high_8):
- test $0x10, %dh
- jnz L(strnlen_exit_tail12)
- test $0x20, %dh
- jnz L(strnlen_exit_tail13)
- test $0x40, %dh
- jnz L(strnlen_exit_tail14)
- sub $16, %edi
- jb L(return_start_len)
- lea 15(%eax), %eax
- RETURN
-
- .p2align 4
-L(strnlen_exit_tail1):
- sub $2, %edi
- jb L(return_start_len)
- lea 1(%eax), %eax
- RETURN
-
- .p2align 4
-L(strnlen_exit_tail2):
- sub $3, %edi
- jb L(return_start_len)
- lea 2(%eax), %eax
- RETURN
-
- .p2align 4
-L(strnlen_exit_tail4):
- sub $5, %edi
- jb L(return_start_len)
- lea 4(%eax), %eax
- RETURN
-
- .p2align 4
-L(strnlen_exit_tail5):
- sub $6, %edi
- jb L(return_start_len)
- lea 5(%eax), %eax
- RETURN
-
- .p2align 4
-L(strnlen_exit_tail6):
- sub $7, %edi
- jb L(return_start_len)
- lea 6(%eax), %eax
- RETURN
-
- .p2align 4
-L(strnlen_exit_tail8):
- sub $9, %edi
- jb L(return_start_len)
- lea 8(%eax), %eax
- RETURN
-
- .p2align 4
-L(strnlen_exit_tail9):
- sub $10, %edi
- jb L(return_start_len)
- lea 9(%eax), %eax
- RETURN
-
- .p2align 4
-L(strnlen_exit_tail10):
- sub $11, %edi
- jb L(return_start_len)
- lea 10(%eax), %eax
- RETURN
-
- .p2align 4
-L(strnlen_exit_tail12):
- sub $13, %edi
- jb L(return_start_len)
- lea 12(%eax), %eax
- RETURN
-
- .p2align 4
-L(strnlen_exit_tail13):
- sub $14, %edi
- jb L(return_start_len)
- lea 13(%eax), %eax
- RETURN
-
- .p2align 4
-L(strnlen_exit_tail14):
- sub $15, %edi
- jb L(return_start_len)
- lea 14(%eax), %eax
- RETURN
-
- .p2align 4
-L(return_start_len):
- movl LEN(%esp), %eax
- RETURN
-
-/* for prolog only */
-
- .p2align 4
-L(len_less4_prolog):
- xor %eax, %eax
-
- add $4, %edi
- jz L(exit_tail0)
-
- cmpb $0, (%edx)
- jz L(exit_tail0)
- cmp $1, %edi
- je L(exit_tail1)
-
- cmpb $0, 1(%edx)
- jz L(exit_tail1)
- cmp $2, %edi
- je L(exit_tail2)
-
- cmpb $0, 2(%edx)
- jz L(exit_tail2)
- cmp $3, %edi
- je L(exit_tail3)
-
- cmpb $0, 3(%edx)
- jz L(exit_tail3)
- mov $4, %eax
- RETURN
-
- .p2align 4
-L(len_less8_prolog):
- add $4, %edi
-
- cmpb $0, 4(%edx)
- jz L(exit_tail4)
- cmp $1, %edi
- je L(exit_tail5)
-
- cmpb $0, 5(%edx)
- jz L(exit_tail5)
- cmp $2, %edi
- je L(exit_tail6)
-
- cmpb $0, 6(%edx)
- jz L(exit_tail6)
- cmp $3, %edi
- je L(exit_tail7)
-
- cmpb $0, 7(%edx)
- jz L(exit_tail7)
- mov $8, %eax
- RETURN
-
-
- .p2align 4
-L(len_less12_prolog):
- add $4, %edi
-
- cmpb $0, 8(%edx)
- jz L(exit_tail8)
- cmp $1, %edi
- je L(exit_tail9)
-
- cmpb $0, 9(%edx)
- jz L(exit_tail9)
- cmp $2, %edi
- je L(exit_tail10)
-
- cmpb $0, 10(%edx)
- jz L(exit_tail10)
- cmp $3, %edi
- je L(exit_tail11)
-
- cmpb $0, 11(%edx)
- jz L(exit_tail11)
- mov $12, %eax
- RETURN
-
- .p2align 4
-L(len_less16_prolog):
- add $4, %edi
-
- cmpb $0, 12(%edx)
- jz L(exit_tail12)
- cmp $1, %edi
- je L(exit_tail13)
-
- cmpb $0, 13(%edx)
- jz L(exit_tail13)
- cmp $2, %edi
- je L(exit_tail14)
-
- cmpb $0, 14(%edx)
- jz L(exit_tail14)
- cmp $3, %edi
- je L(exit_tail15)
-
- cmpb $0, 15(%edx)
- jz L(exit_tail15)
- mov $16, %eax
- RETURN
-# endif
-
- .p2align 4
-L(exit_tail1):
- add $1, %eax
- RETURN
-
-L(exit_tail2):
- add $2, %eax
- RETURN
-
-L(exit_tail3):
- add $3, %eax
- RETURN
-
-L(exit_tail4):
- add $4, %eax
- RETURN
-
-L(exit_tail5):
- add $5, %eax
- RETURN
-
-L(exit_tail6):
- add $6, %eax
- RETURN
-
-L(exit_tail7):
- add $7, %eax
- RETURN
-
-L(exit_tail8):
- add $8, %eax
- RETURN
-
-L(exit_tail9):
- add $9, %eax
- RETURN
-
-L(exit_tail10):
- add $10, %eax
- RETURN
-
-L(exit_tail11):
- add $11, %eax
- RETURN
-
-L(exit_tail12):
- add $12, %eax
- RETURN
-
-L(exit_tail13):
- add $13, %eax
- RETURN
-
-L(exit_tail14):
- add $14, %eax
- RETURN
-
-L(exit_tail15):
- add $15, %eax
-# ifndef USE_AS_STRCAT
- RETURN
-END (STRLEN)
-# endif
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strlen.S b/sysdeps/i386/i686/multiarch/strlen.S
deleted file mode 100644
index 77cf6bcdb0..0000000000
--- a/sysdeps/i386/i686/multiarch/strlen.S
+++ /dev/null
@@ -1,60 +0,0 @@
-/* Multiple versions of strlen
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2009-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in libc and for the
- DSO. In static binaries, we need strlen before the initialization
- happened. */
-#if defined SHARED && IS_IN (libc)
- .text
-ENTRY(strlen)
- .type strlen, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__strlen_ia32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__strlen_sse2_bsf)
- HAS_ARCH_FEATURE (Slow_BSF)
- jz 2f
- LOAD_FUNC_GOT_EAX (__strlen_sse2)
-2: ret
-END(strlen)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __strlen_ia32, @function; \
- .globl __strlen_ia32; \
- .p2align 4; \
- __strlen_ia32: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __strlen_ia32, .-__strlen_ia32
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
- they will be called without setting up EBX needed for PLT which is
- used by IFUNC. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_strlen; __GI_strlen = __strlen_ia32
-#endif
-
-#include "../../i586/strlen.S"
diff --git a/sysdeps/i386/i686/multiarch/strncase-c.c b/sysdeps/i386/i686/multiarch/strncase-c.c
deleted file mode 100644
index 76581eb62b..0000000000
--- a/sysdeps/i386/i686/multiarch/strncase-c.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include <string.h>
-
-extern __typeof (strncasecmp) __strncasecmp_nonascii;
-
-#define __strncasecmp __strncasecmp_nonascii
-#include <string/strncase.c>
-
-strong_alias (__strncasecmp_nonascii, __strncasecmp_ia32)
diff --git a/sysdeps/i386/i686/multiarch/strncase.S b/sysdeps/i386/i686/multiarch/strncase.S
deleted file mode 100644
index a56e63a566..0000000000
--- a/sysdeps/i386/i686/multiarch/strncase.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/* Entry point for multi-version x86 strncasecmp.
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
- .text
-ENTRY(__strncasecmp)
- .type __strncasecmp, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__strncasecmp_ia32)
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- LOAD_FUNC_GOT_EAX (__strncasecmp_ssse3)
- HAS_CPU_FEATURE (SSE4_2)
- jz 2f
- HAS_ARCH_FEATURE (Slow_SSE4_2)
- jnz 2f
- LOAD_FUNC_GOT_EAX (__strncasecmp_sse4_2)
-2: ret
-END(__strncasecmp)
-
-weak_alias (__strncasecmp, strncasecmp)
diff --git a/sysdeps/i386/i686/multiarch/strncase_l-c.c b/sysdeps/i386/i686/multiarch/strncase_l-c.c
deleted file mode 100644
index 7e601af271..0000000000
--- a/sysdeps/i386/i686/multiarch/strncase_l-c.c
+++ /dev/null
@@ -1,13 +0,0 @@
-#include <string.h>
-
-extern __typeof (strncasecmp_l) __strncasecmp_l_nonascii;
-
-#define __strncasecmp_l __strncasecmp_l_nonascii
-#define USE_IN_EXTENDED_LOCALE_MODEL 1
-#include <string/strncase.c>
-
-strong_alias (__strncasecmp_l_nonascii, __strncasecmp_l_ia32)
-
-/* The needs of strcasecmp in libc are minimal, no need to go through
- the IFUNC. */
-strong_alias (__strncasecmp_l_nonascii, __GI___strncasecmp_l)
diff --git a/sysdeps/i386/i686/multiarch/strncase_l-sse4.S b/sysdeps/i386/i686/multiarch/strncase_l-sse4.S
deleted file mode 100644
index 557210832e..0000000000
--- a/sysdeps/i386/i686/multiarch/strncase_l-sse4.S
+++ /dev/null
@@ -1,2 +0,0 @@
-#define USE_AS_STRNCASECMP_L 1
-#include "strcmp-sse4.S"
diff --git a/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S b/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S
deleted file mode 100644
index d438a1ae35..0000000000
--- a/sysdeps/i386/i686/multiarch/strncase_l-ssse3.S
+++ /dev/null
@@ -1,2 +0,0 @@
-#define USE_AS_STRNCASECMP_L 1
-#include "strcmp-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/strncase_l.S b/sysdeps/i386/i686/multiarch/strncase_l.S
deleted file mode 100644
index 8a74ee8574..0000000000
--- a/sysdeps/i386/i686/multiarch/strncase_l.S
+++ /dev/null
@@ -1,7 +0,0 @@
-/* Multiple versions of strncasecmp_l
- All versions must be listed in ifunc-impl-list.c. */
-#define STRCMP __strncasecmp_l
-#define USE_AS_STRNCASECMP_L
-#include "strcmp.S"
-
-weak_alias (__strncasecmp_l, strncasecmp_l)
diff --git a/sysdeps/i386/i686/multiarch/strncat-c.c b/sysdeps/i386/i686/multiarch/strncat-c.c
deleted file mode 100644
index 132a000545..0000000000
--- a/sysdeps/i386/i686/multiarch/strncat-c.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#define STRNCAT __strncat_ia32
-#ifdef SHARED
-#undef libc_hidden_def
-#define libc_hidden_def(name) \
- __hidden_ver1 (__strncat_ia32, __GI___strncat, __strncat_ia32);
-#endif
-
-#include "string/strncat.c"
diff --git a/sysdeps/i386/i686/multiarch/strncat-sse2.S b/sysdeps/i386/i686/multiarch/strncat-sse2.S
deleted file mode 100644
index f1045b72b8..0000000000
--- a/sysdeps/i386/i686/multiarch/strncat-sse2.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define STRCAT __strncat_sse2
-#define USE_AS_STRNCAT
-
-#include "strcat-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/strncat-ssse3.S b/sysdeps/i386/i686/multiarch/strncat-ssse3.S
deleted file mode 100644
index 625b90a978..0000000000
--- a/sysdeps/i386/i686/multiarch/strncat-ssse3.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define STRCAT __strncat_ssse3
-#define USE_AS_STRNCAT
-
-#include "strcat-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/strncat.S b/sysdeps/i386/i686/multiarch/strncat.S
deleted file mode 100644
index 5c1bf41453..0000000000
--- a/sysdeps/i386/i686/multiarch/strncat.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* Multiple versions of strncat
- All versions must be listed in ifunc-impl-list.c. */
-#define STRCAT strncat
-#define USE_AS_STRNCAT
-#include "strcat.S"
diff --git a/sysdeps/i386/i686/multiarch/strncmp-c.c b/sysdeps/i386/i686/multiarch/strncmp-c.c
deleted file mode 100644
index cc059da494..0000000000
--- a/sysdeps/i386/i686/multiarch/strncmp-c.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifdef SHARED
-# define STRNCMP __strncmp_ia32
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name) \
- __hidden_ver1 (__strncmp_ia32, __GI_strncmp, __strncmp_ia32);
-#endif
-
-#include "string/strncmp.c"
diff --git a/sysdeps/i386/i686/multiarch/strncmp-sse4.S b/sysdeps/i386/i686/multiarch/strncmp-sse4.S
deleted file mode 100644
index cf14dfaf6c..0000000000
--- a/sysdeps/i386/i686/multiarch/strncmp-sse4.S
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifdef SHARED
-# define USE_AS_STRNCMP
-# define STRCMP __strncmp_sse4_2
-# include "strcmp-sse4.S"
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strncmp-ssse3.S b/sysdeps/i386/i686/multiarch/strncmp-ssse3.S
deleted file mode 100644
index 536c8685f2..0000000000
--- a/sysdeps/i386/i686/multiarch/strncmp-ssse3.S
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifdef SHARED
-# define USE_AS_STRNCMP
-# define STRCMP __strncmp_ssse3
-# include "strcmp-ssse3.S"
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strncmp.S b/sysdeps/i386/i686/multiarch/strncmp.S
deleted file mode 100644
index 150d4786d2..0000000000
--- a/sysdeps/i386/i686/multiarch/strncmp.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* Multiple versions of strncmp
- All versions must be listed in ifunc-impl-list.c. */
-#define USE_AS_STRNCMP
-#define STRCMP strncmp
-#include "strcmp.S"
diff --git a/sysdeps/i386/i686/multiarch/strncpy-c.c b/sysdeps/i386/i686/multiarch/strncpy-c.c
deleted file mode 100644
index 201e3f98b3..0000000000
--- a/sysdeps/i386/i686/multiarch/strncpy-c.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#define STRNCPY __strncpy_ia32
-#ifdef SHARED
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name) \
- __hidden_ver1 (__strncpy_ia32, __GI_strncpy, __strncpy_ia32);
-#endif
-
-#include "string/strncpy.c"
diff --git a/sysdeps/i386/i686/multiarch/strncpy-sse2.S b/sysdeps/i386/i686/multiarch/strncpy-sse2.S
deleted file mode 100644
index bdd99239a4..0000000000
--- a/sysdeps/i386/i686/multiarch/strncpy-sse2.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STRNCPY
-#define STRCPY __strncpy_sse2
-#include "strcpy-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/strncpy-ssse3.S b/sysdeps/i386/i686/multiarch/strncpy-ssse3.S
deleted file mode 100644
index bf82ee447d..0000000000
--- a/sysdeps/i386/i686/multiarch/strncpy-ssse3.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STRNCPY
-#define STRCPY __strncpy_ssse3
-#include "strcpy-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/strncpy.S b/sysdeps/i386/i686/multiarch/strncpy.S
deleted file mode 100644
index 9c257efc6e..0000000000
--- a/sysdeps/i386/i686/multiarch/strncpy.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* Multiple versions of strncpy
- All versions must be listed in ifunc-impl-list.c. */
-#define USE_AS_STRNCPY
-#define STRCPY strncpy
-#include "strcpy.S"
diff --git a/sysdeps/i386/i686/multiarch/strnlen-c.c b/sysdeps/i386/i686/multiarch/strnlen-c.c
deleted file mode 100644
index 351e939a93..0000000000
--- a/sysdeps/i386/i686/multiarch/strnlen-c.c
+++ /dev/null
@@ -1,10 +0,0 @@
-#define STRNLEN __strnlen_ia32
-#ifdef SHARED
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
- __hidden_ver1 (__strnlen_ia32, __GI_strnlen, __strnlen_ia32); \
- strong_alias (__strnlen_ia32, __strnlen_ia32_1); \
- __hidden_ver1 (__strnlen_ia32_1, __GI___strnlen, __strnlen_ia32_1);
-#endif
-
-#include "string/strnlen.c"
diff --git a/sysdeps/i386/i686/multiarch/strnlen-sse2.S b/sysdeps/i386/i686/multiarch/strnlen-sse2.S
deleted file mode 100644
index 56b6ae2a5c..0000000000
--- a/sysdeps/i386/i686/multiarch/strnlen-sse2.S
+++ /dev/null
@@ -1,3 +0,0 @@
-#define USE_AS_STRNLEN
-#define STRLEN __strnlen_sse2
-#include "strlen-sse2.S"
diff --git a/sysdeps/i386/i686/multiarch/strnlen.S b/sysdeps/i386/i686/multiarch/strnlen.S
deleted file mode 100644
index d241522c70..0000000000
--- a/sysdeps/i386/i686/multiarch/strnlen.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of strnlen
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if IS_IN (libc)
- .text
-ENTRY(__strnlen)
- .type __strnlen, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__strnlen_ia32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__strnlen_sse2)
-2: ret
-END(__strnlen)
-
-weak_alias(__strnlen, strnlen)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strpbrk-c.c b/sysdeps/i386/i686/multiarch/strpbrk-c.c
deleted file mode 100644
index 5db62053b3..0000000000
--- a/sysdeps/i386/i686/multiarch/strpbrk-c.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define __strpbrk_sse2 __strpbrk_ia32
-#include <sysdeps/x86_64/multiarch/strpbrk-c.c>
diff --git a/sysdeps/i386/i686/multiarch/strpbrk.S b/sysdeps/i386/i686/multiarch/strpbrk.S
deleted file mode 100644
index 7201d6376f..0000000000
--- a/sysdeps/i386/i686/multiarch/strpbrk.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* Multiple versions of strpbrk
- All versions must be listed in ifunc-impl-list.c. */
-#define STRCSPN strpbrk
-#define USE_AS_STRPBRK
-#include "strcspn.S"
diff --git a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
deleted file mode 100644
index 39a7c8825b..0000000000
--- a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
+++ /dev/null
@@ -1,282 +0,0 @@
-/* strrchr with SSE2 with bsf and bsr
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS 4
-# define STR1 PARMS
-# define STR2 STR1+4
-
- .text
-ENTRY (__strrchr_sse2_bsf)
-
- mov STR1(%esp), %ecx
- movd STR2(%esp), %xmm1
-
- PUSH (%edi)
- pxor %xmm2, %xmm2
- mov %ecx, %edi
- punpcklbw %xmm1, %xmm1
- punpcklbw %xmm1, %xmm1
- /* ECX has OFFSET. */
- and $63, %ecx
- cmp $48, %ecx
- pshufd $0, %xmm1, %xmm1
- ja L(crosscashe)
-
-/* unaligned string. */
- movdqu (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- pcmpeqb %xmm1, %xmm0
- /* Find where NULL is. */
- pmovmskb %xmm2, %edx
- /* Check if there is a match. */
- pmovmskb %xmm0, %eax
-
- test %eax, %eax
- jnz L(unaligned_match1)
-
- test %edx, %edx
- jnz L(return_null)
-
- and $-16, %edi
- add $16, %edi
-
- PUSH (%esi)
- PUSH (%ebx)
-
- xor %ebx, %ebx
- jmp L(loop)
-
- CFI_POP (%esi)
- CFI_POP (%ebx)
-
- .p2align 4
-L(unaligned_return_value1):
- bsf %edx, %ecx
- mov $2, %edx
- shl %cl, %edx
- sub $1, %edx
- and %edx, %eax
- jz L(return_null)
- bsr %eax, %eax
- add %edi, %eax
- POP (%edi)
- ret
- CFI_PUSH (%edi)
-
- .p2align 4
-L(unaligned_match1):
- test %edx, %edx
- jnz L(unaligned_return_value1)
-
- PUSH (%esi)
- PUSH (%ebx)
-
- mov %eax, %ebx
- lea 16(%edi), %esi
- and $-16, %edi
- add $16, %edi
- jmp L(loop)
-
- CFI_POP (%esi)
- CFI_POP (%ebx)
-
- .p2align 4
- L(crosscashe):
-/* Hancle unaligned string. */
- and $15, %ecx
- and $-16, %edi
- pxor %xmm3, %xmm3
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm3
- pcmpeqb %xmm1, %xmm0
- /* Find where NULL is. */
- pmovmskb %xmm3, %edx
- /* Check if there is a match. */
- pmovmskb %xmm0, %eax
- /* Remove the leading bytes. */
- shr %cl, %edx
- shr %cl, %eax
-
- test %eax, %eax
- jnz L(unaligned_match)
-
- test %edx, %edx
- jnz L(return_null)
-
- add $16, %edi
-
- PUSH (%esi)
- PUSH (%ebx)
-
- xor %ebx, %ebx
- jmp L(loop)
-
- CFI_POP (%esi)
- CFI_POP (%ebx)
-
- .p2align 4
-L(unaligned_return_value):
- add %ecx, %edi
- bsf %edx, %ecx
- mov $2, %edx
- shl %cl, %edx
- sub $1, %edx
- and %edx, %eax
- jz L(return_null)
- bsr %eax, %eax
- add %edi, %eax
- POP (%edi)
- ret
- CFI_PUSH (%edi)
-
- .p2align 4
-L(unaligned_match):
- test %edx, %edx
- jnz L(unaligned_return_value)
-
- PUSH (%esi)
- PUSH (%ebx)
-
- mov %eax, %ebx
- add $16, %edi
- lea (%edi, %ecx), %esi
-
-/* Loop start on aligned string. */
- .p2align 4
-L(loop):
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- add $16, %edi
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %ecx
- pmovmskb %xmm0, %eax
- or %eax, %ecx
- jnz L(matches)
-
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- add $16, %edi
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %ecx
- pmovmskb %xmm0, %eax
- or %eax, %ecx
- jnz L(matches)
-
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- add $16, %edi
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %ecx
- pmovmskb %xmm0, %eax
- or %eax, %ecx
- jnz L(matches)
-
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- add $16, %edi
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %ecx
- pmovmskb %xmm0, %eax
- or %eax, %ecx
- jz L(loop)
-
-L(matches):
- test %eax, %eax
- jnz L(match)
-L(return_value):
- test %ebx, %ebx
- jz L(return_null_1)
- bsr %ebx, %eax
- add %esi, %eax
-
- POP (%ebx)
- POP (%esi)
-
- sub $16, %eax
- POP (%edi)
- ret
-
- CFI_PUSH (%edi)
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-
- .p2align 4
-L(match):
- pmovmskb %xmm2, %ecx
- test %ecx, %ecx
- jnz L(return_value_1)
- mov %eax, %ebx
- mov %edi, %esi
- jmp L(loop)
-
- .p2align 4
-L(return_value_1):
- bsf %ecx, %ecx
- mov $2, %edx
- shl %cl, %edx
- sub $1, %edx
- and %edx, %eax
- jz L(return_value)
-
- POP (%ebx)
- POP (%esi)
-
- bsr %eax, %eax
- add %edi, %eax
- sub $16, %eax
- POP (%edi)
- ret
-
- CFI_PUSH (%edi)
-/* Return NULL. */
- .p2align 4
-L(return_null):
- xor %eax, %eax
- POP (%edi)
- ret
-
- CFI_PUSH (%edi)
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-/* Return NULL. */
- .p2align 4
-L(return_null_1):
- POP (%ebx)
- POP (%esi)
- POP (%edi)
- xor %eax, %eax
- ret
-
-END (__strrchr_sse2_bsf)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strrchr-sse2.S b/sysdeps/i386/i686/multiarch/strrchr-sse2.S
deleted file mode 100644
index 20934288be..0000000000
--- a/sysdeps/i386/i686/multiarch/strrchr-sse2.S
+++ /dev/null
@@ -1,708 +0,0 @@
-/* strrchr SSE2 without bsf and bsr
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS 8
-# define ENTRANCE PUSH(%edi);
-# define RETURN POP(%edi); ret; CFI_PUSH(%edi);
-
-# define STR1 PARMS
-# define STR2 STR1+4
-
- atom_text_section
-ENTRY (__strrchr_sse2)
-
- ENTRANCE
- mov STR1(%esp), %ecx
- movd STR2(%esp), %xmm1
-
- pxor %xmm2, %xmm2
- mov %ecx, %edi
- punpcklbw %xmm1, %xmm1
- punpcklbw %xmm1, %xmm1
- /* ECX has OFFSET. */
- and $63, %ecx
- cmp $48, %ecx
- pshufd $0, %xmm1, %xmm1
- ja L(crosscache)
-
-/* unaligned string. */
- movdqu (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- pcmpeqb %xmm1, %xmm0
- /* Find where NULL is. */
- pmovmskb %xmm2, %ecx
- /* Check if there is a match. */
- pmovmskb %xmm0, %eax
- add $16, %edi
-
- test %eax, %eax
- jnz L(unaligned_match1)
-
- test %ecx, %ecx
- jnz L(return_null)
-
- and $-16, %edi
-
- PUSH (%esi)
- PUSH (%ebx)
-
- xor %ebx, %ebx
- jmp L(loop)
-
- CFI_POP (%esi)
- CFI_POP (%ebx)
-
- .p2align 4
-L(unaligned_match1):
- test %ecx, %ecx
- jnz L(prolog_find_zero_1)
-
- PUSH (%esi)
- PUSH (%ebx)
-
- mov %eax, %ebx
- mov %edi, %esi
- and $-16, %edi
- jmp L(loop)
-
- CFI_POP (%esi)
- CFI_POP (%ebx)
-
- .p2align 4
-L(crosscache):
-/* Hancle unaligned string. */
- and $15, %ecx
- and $-16, %edi
- pxor %xmm3, %xmm3
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm3
- pcmpeqb %xmm1, %xmm0
- /* Find where NULL is. */
- pmovmskb %xmm3, %edx
- /* Check if there is a match. */
- pmovmskb %xmm0, %eax
- /* Remove the leading bytes. */
- shr %cl, %edx
- shr %cl, %eax
- add $16, %edi
-
- test %eax, %eax
- jnz L(unaligned_match)
-
- test %edx, %edx
- jnz L(return_null)
-
- PUSH (%esi)
- PUSH (%ebx)
-
- xor %ebx, %ebx
- jmp L(loop)
-
- CFI_POP (%esi)
- CFI_POP (%ebx)
-
- .p2align 4
-L(unaligned_match):
- test %edx, %edx
- jnz L(prolog_find_zero)
-
- PUSH (%esi)
- PUSH (%ebx)
-
- mov %eax, %ebx
- lea (%edi, %ecx), %esi
-
-/* Loop start on aligned string. */
- .p2align 4
-L(loop):
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- add $16, %edi
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %ecx
- pmovmskb %xmm0, %eax
- or %eax, %ecx
- jnz L(matches)
-
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- add $16, %edi
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %ecx
- pmovmskb %xmm0, %eax
- or %eax, %ecx
- jnz L(matches)
-
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- add $16, %edi
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %ecx
- pmovmskb %xmm0, %eax
- or %eax, %ecx
- jnz L(matches)
-
- movdqa (%edi), %xmm0
- pcmpeqb %xmm0, %xmm2
- add $16, %edi
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %ecx
- pmovmskb %xmm0, %eax
- or %eax, %ecx
- jz L(loop)
-
-L(matches):
- test %eax, %eax
- jnz L(match)
-L(return_value):
- test %ebx, %ebx
- jz L(return_null_1)
- mov %ebx, %eax
- mov %esi, %edi
-
- POP (%ebx)
- POP (%esi)
-
- jmp L(match_exit)
-
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-
- .p2align 4
-L(return_null_1):
- POP (%ebx)
- POP (%esi)
-
- xor %eax, %eax
- RETURN
-
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-
- .p2align 4
-L(match):
- pmovmskb %xmm2, %ecx
- test %ecx, %ecx
- jnz L(find_zero)
- mov %eax, %ebx
- mov %edi, %esi
- jmp L(loop)
-
- .p2align 4
-L(find_zero):
- test %cl, %cl
- jz L(find_zero_high)
- mov %cl, %dl
- and $15, %dl
- jz L(find_zero_8)
- test $0x01, %cl
- jnz L(FindZeroExit1)
- test $0x02, %cl
- jnz L(FindZeroExit2)
- test $0x04, %cl
- jnz L(FindZeroExit3)
- and $1 << 4 - 1, %eax
- jz L(return_value)
-
- POP (%ebx)
- POP (%esi)
- jmp L(match_exit)
-
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-
- .p2align 4
-L(find_zero_8):
- test $0x10, %cl
- jnz L(FindZeroExit5)
- test $0x20, %cl
- jnz L(FindZeroExit6)
- test $0x40, %cl
- jnz L(FindZeroExit7)
- and $1 << 8 - 1, %eax
- jz L(return_value)
-
- POP (%ebx)
- POP (%esi)
- jmp L(match_exit)
-
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-
- .p2align 4
-L(find_zero_high):
- mov %ch, %dh
- and $15, %dh
- jz L(find_zero_high_8)
- test $0x01, %ch
- jnz L(FindZeroExit9)
- test $0x02, %ch
- jnz L(FindZeroExit10)
- test $0x04, %ch
- jnz L(FindZeroExit11)
- and $1 << 12 - 1, %eax
- jz L(return_value)
-
- POP (%ebx)
- POP (%esi)
- jmp L(match_exit)
-
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-
- .p2align 4
-L(find_zero_high_8):
- test $0x10, %ch
- jnz L(FindZeroExit13)
- test $0x20, %ch
- jnz L(FindZeroExit14)
- test $0x40, %ch
- jnz L(FindZeroExit15)
- and $1 << 16 - 1, %eax
- jz L(return_value)
-
- POP (%ebx)
- POP (%esi)
- jmp L(match_exit)
-
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-
- .p2align 4
-L(FindZeroExit1):
- and $1, %eax
- jz L(return_value)
-
- POP (%ebx)
- POP (%esi)
- jmp L(match_exit)
-
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-
- .p2align 4
-L(FindZeroExit2):
- and $1 << 2 - 1, %eax
- jz L(return_value)
-
- POP (%ebx)
- POP (%esi)
- jmp L(match_exit)
-
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-
- .p2align 4
-L(FindZeroExit3):
- and $1 << 3 - 1, %eax
- jz L(return_value)
-
- POP (%ebx)
- POP (%esi)
- jmp L(match_exit)
-
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-
- .p2align 4
-L(FindZeroExit5):
- and $1 << 5 - 1, %eax
- jz L(return_value)
-
- POP (%ebx)
- POP (%esi)
- jmp L(match_exit)
-
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-
- .p2align 4
-L(FindZeroExit6):
- and $1 << 6 - 1, %eax
- jz L(return_value)
-
- POP (%ebx)
- POP (%esi)
- jmp L(match_exit)
-
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-
- .p2align 4
-L(FindZeroExit7):
- and $1 << 7 - 1, %eax
- jz L(return_value)
-
- POP (%ebx)
- POP (%esi)
- jmp L(match_exit)
-
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-
- .p2align 4
-L(FindZeroExit9):
- and $1 << 9 - 1, %eax
- jz L(return_value)
-
- POP (%ebx)
- POP (%esi)
- jmp L(match_exit)
-
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-
- .p2align 4
-L(FindZeroExit10):
- and $1 << 10 - 1, %eax
- jz L(return_value)
-
- POP (%ebx)
- POP (%esi)
- jmp L(match_exit)
-
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-
- .p2align 4
-L(FindZeroExit11):
- and $1 << 11 - 1, %eax
- jz L(return_value)
-
- POP (%ebx)
- POP (%esi)
- jmp L(match_exit)
-
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-
- .p2align 4
-L(FindZeroExit13):
- and $1 << 13 - 1, %eax
- jz L(return_value)
-
- POP (%ebx)
- POP (%esi)
- jmp L(match_exit)
-
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-
- .p2align 4
-L(FindZeroExit14):
- and $1 << 14 - 1, %eax
- jz L(return_value)
-
- POP (%ebx)
- POP (%esi)
- jmp L(match_exit)
-
- CFI_PUSH (%ebx)
- CFI_PUSH (%esi)
-
- .p2align 4
-L(FindZeroExit15):
- and $1 << 15 - 1, %eax
- jz L(return_value)
-
- POP (%ebx)
- POP (%esi)
-
- .p2align 4
-L(match_exit):
- test %ah, %ah
- jnz L(match_exit_high)
- mov %al, %dl
- and $15 << 4, %dl
- jnz L(match_exit_8)
- test $0x08, %al
- jnz L(Exit4)
- test $0x04, %al
- jnz L(Exit3)
- test $0x02, %al
- jnz L(Exit2)
- lea -16(%edi), %eax
- RETURN
-
- .p2align 4
-L(match_exit_8):
- test $0x80, %al
- jnz L(Exit8)
- test $0x40, %al
- jnz L(Exit7)
- test $0x20, %al
- jnz L(Exit6)
- lea -12(%edi), %eax
- RETURN
-
- .p2align 4
-L(match_exit_high):
- mov %ah, %dh
- and $15 << 4, %dh
- jnz L(match_exit_high_8)
- test $0x08, %ah
- jnz L(Exit12)
- test $0x04, %ah
- jnz L(Exit11)
- test $0x02, %ah
- jnz L(Exit10)
- lea -8(%edi), %eax
- RETURN
-
- .p2align 4
-L(match_exit_high_8):
- test $0x80, %ah
- jnz L(Exit16)
- test $0x40, %ah
- jnz L(Exit15)
- test $0x20, %ah
- jnz L(Exit14)
- lea -4(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit2):
- lea -15(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit3):
- lea -14(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit4):
- lea -13(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit6):
- lea -11(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit7):
- lea -10(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit8):
- lea -9(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit10):
- lea -7(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit11):
- lea -6(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit12):
- lea -5(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit14):
- lea -3(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit15):
- lea -2(%edi), %eax
- RETURN
-
- .p2align 4
-L(Exit16):
- lea -1(%edi), %eax
- RETURN
-
-/* Return NULL. */
- .p2align 4
-L(return_null):
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(prolog_find_zero):
- add %ecx, %edi
- mov %edx, %ecx
-L(prolog_find_zero_1):
- test %cl, %cl
- jz L(prolog_find_zero_high)
- mov %cl, %dl
- and $15, %dl
- jz L(prolog_find_zero_8)
- test $0x01, %cl
- jnz L(PrologFindZeroExit1)
- test $0x02, %cl
- jnz L(PrologFindZeroExit2)
- test $0x04, %cl
- jnz L(PrologFindZeroExit3)
- and $1 << 4 - 1, %eax
- jnz L(match_exit)
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(prolog_find_zero_8):
- test $0x10, %cl
- jnz L(PrologFindZeroExit5)
- test $0x20, %cl
- jnz L(PrologFindZeroExit6)
- test $0x40, %cl
- jnz L(PrologFindZeroExit7)
- and $1 << 8 - 1, %eax
- jnz L(match_exit)
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(prolog_find_zero_high):
- mov %ch, %dh
- and $15, %dh
- jz L(prolog_find_zero_high_8)
- test $0x01, %ch
- jnz L(PrologFindZeroExit9)
- test $0x02, %ch
- jnz L(PrologFindZeroExit10)
- test $0x04, %ch
- jnz L(PrologFindZeroExit11)
- and $1 << 12 - 1, %eax
- jnz L(match_exit)
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(prolog_find_zero_high_8):
- test $0x10, %ch
- jnz L(PrologFindZeroExit13)
- test $0x20, %ch
- jnz L(PrologFindZeroExit14)
- test $0x40, %ch
- jnz L(PrologFindZeroExit15)
- and $1 << 16 - 1, %eax
- jnz L(match_exit)
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(PrologFindZeroExit1):
- and $1, %eax
- jnz L(match_exit)
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(PrologFindZeroExit2):
- and $1 << 2 - 1, %eax
- jnz L(match_exit)
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(PrologFindZeroExit3):
- and $1 << 3 - 1, %eax
- jnz L(match_exit)
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(PrologFindZeroExit5):
- and $1 << 5 - 1, %eax
- jnz L(match_exit)
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(PrologFindZeroExit6):
- and $1 << 6 - 1, %eax
- jnz L(match_exit)
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(PrologFindZeroExit7):
- and $1 << 7 - 1, %eax
- jnz L(match_exit)
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(PrologFindZeroExit9):
- and $1 << 9 - 1, %eax
- jnz L(match_exit)
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(PrologFindZeroExit10):
- and $1 << 10 - 1, %eax
- jnz L(match_exit)
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(PrologFindZeroExit11):
- and $1 << 11 - 1, %eax
- jnz L(match_exit)
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(PrologFindZeroExit13):
- and $1 << 13 - 1, %eax
- jnz L(match_exit)
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(PrologFindZeroExit14):
- and $1 << 14 - 1, %eax
- jnz L(match_exit)
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(PrologFindZeroExit15):
- and $1 << 15 - 1, %eax
- jnz L(match_exit)
- xor %eax, %eax
- RETURN
-
-END (__strrchr_sse2)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/strrchr.S b/sysdeps/i386/i686/multiarch/strrchr.S
deleted file mode 100644
index d9281eaeae..0000000000
--- a/sysdeps/i386/i686/multiarch/strrchr.S
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Multiple versions of strrchr
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if IS_IN (libc)
- .text
-ENTRY(strrchr)
- .type strrchr, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__strrchr_ia32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__strrchr_sse2_bsf)
- HAS_ARCH_FEATURE (Slow_BSF)
- jz 2f
- LOAD_FUNC_GOT_EAX (__strrchr_sse2)
-2: ret
-END(strrchr)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __strrchr_ia32, @function; \
- .globl __strrchr_ia32; \
- .p2align 4; \
- __strrchr_ia32: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __strrchr_ia32, .-__strrchr_ia32
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
- they will be called without setting up EBX needed for PLT which is
- used by IFUNC. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_strrchr; __GI_strrchr = __strrchr_ia32
-#endif
-
-#include "../../strrchr.S"
diff --git a/sysdeps/i386/i686/multiarch/strspn-c.c b/sysdeps/i386/i686/multiarch/strspn-c.c
deleted file mode 100644
index bea09dea71..0000000000
--- a/sysdeps/i386/i686/multiarch/strspn-c.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define __strspn_sse2 __strspn_ia32
-#include <sysdeps/x86_64/multiarch/strspn-c.c>
diff --git a/sysdeps/i386/i686/multiarch/strspn.S b/sysdeps/i386/i686/multiarch/strspn.S
deleted file mode 100644
index 1269062381..0000000000
--- a/sysdeps/i386/i686/multiarch/strspn.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* Multiple versions of strspn
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2009-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <config.h>
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in libc. */
-#if IS_IN (libc)
- .text
-ENTRY(strspn)
- .type strspn, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__strspn_ia32)
- HAS_CPU_FEATURE (SSE4_2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__strspn_sse42)
-2: ret
-END(strspn)
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __strspn_ia32, @function; \
- .globl __strspn_ia32; \
- .p2align 4; \
-__strspn_ia32: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __strspn_ia32, .-__strspn_ia32
-# undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
- they will be called without setting up EBX needed for PLT which is
- used by IFUNC. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_strspn; __GI_strspn = __strspn_ia32
-#endif
-
-#include "../../strspn.S"
diff --git a/sysdeps/i386/i686/multiarch/test-multiarch.c b/sysdeps/i386/i686/multiarch/test-multiarch.c
deleted file mode 100644
index 593cfec273..0000000000
--- a/sysdeps/i386/i686/multiarch/test-multiarch.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/x86_64/multiarch/test-multiarch.c>
diff --git a/sysdeps/i386/i686/multiarch/varshift.c b/sysdeps/i386/i686/multiarch/varshift.c
deleted file mode 100644
index 7760b966e2..0000000000
--- a/sysdeps/i386/i686/multiarch/varshift.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/x86_64/multiarch/varshift.c>
diff --git a/sysdeps/i386/i686/multiarch/varshift.h b/sysdeps/i386/i686/multiarch/varshift.h
deleted file mode 100644
index 7c72c70d67..0000000000
--- a/sysdeps/i386/i686/multiarch/varshift.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/x86_64/multiarch/varshift.h>
diff --git a/sysdeps/i386/i686/multiarch/wcschr-c.c b/sysdeps/i386/i686/multiarch/wcschr-c.c
deleted file mode 100644
index 38d41d04de..0000000000
--- a/sysdeps/i386/i686/multiarch/wcschr-c.c
+++ /dev/null
@@ -1,22 +0,0 @@
-#include <wchar.h>
-
-#if IS_IN (libc)
-# undef libc_hidden_weak
-# define libc_hidden_weak(name)
-
-# undef weak_alias
-# define weak_alias(name,alias)
-
-# ifdef SHARED
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
- __hidden_ver1 (__wcschr_ia32, __GI_wcschr, __wcschr_ia32); \
- strong_alias (__wcschr_ia32, __wcschr_ia32_1); \
- __hidden_ver1 (__wcschr_ia32_1, __GI___wcschr, __wcschr_ia32_1);
-# endif
-#endif
-
-extern __typeof (wcschr) __wcschr_ia32;
-
-#define WCSCHR __wcschr_ia32
-#include <wcsmbs/wcschr.c>
diff --git a/sysdeps/i386/i686/multiarch/wcschr-sse2.S b/sysdeps/i386/i686/multiarch/wcschr-sse2.S
deleted file mode 100644
index 9ff6c3b8d6..0000000000
--- a/sysdeps/i386/i686/multiarch/wcschr-sse2.S
+++ /dev/null
@@ -1,219 +0,0 @@
-/* wcschr with SSE2, without using bsf instructions
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-# include <sysdep.h>
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS 4
-# define STR1 PARMS
-# define STR2 STR1+4
-
- atom_text_section
-ENTRY (__wcschr_sse2)
-
- mov STR1(%esp), %ecx
- movd STR2(%esp), %xmm1
-
- mov %ecx, %eax
- punpckldq %xmm1, %xmm1
- pxor %xmm2, %xmm2
- punpckldq %xmm1, %xmm1
-
- and $63, %eax
- cmp $48, %eax
- ja L(cross_cache)
-
- movdqu (%ecx), %xmm0
- pcmpeqd %xmm0, %xmm2
- pcmpeqd %xmm1, %xmm0
- pmovmskb %xmm2, %edx
- pmovmskb %xmm0, %eax
- or %eax, %edx
- jnz L(matches)
- and $-16, %ecx
- jmp L(loop)
-
- .p2align 4
-L(cross_cache):
- PUSH (%edi)
- mov %ecx, %edi
- mov %eax, %ecx
- and $-16, %edi
- and $15, %ecx
- movdqa (%edi), %xmm0
- pcmpeqd %xmm0, %xmm2
- pcmpeqd %xmm1, %xmm0
- pmovmskb %xmm2, %edx
- pmovmskb %xmm0, %eax
-
- sarl %cl, %edx
- sarl %cl, %eax
- test %eax, %eax
- jz L(unaligned_no_match)
-
- add %edi, %ecx
- POP (%edi)
-
- test %edx, %edx
- jz L(match_case1)
- test %al, %al
- jz L(match_higth_case2)
- test $15, %al
- jnz L(match_case2_4)
- test $15, %dl
- jnz L(return_null)
- lea 4(%ecx), %eax
- ret
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(unaligned_no_match):
- mov %edi, %ecx
- POP (%edi)
-
- test %edx, %edx
- jnz L(return_null)
-
- pxor %xmm2, %xmm2
-
-/* Loop start on aligned string. */
- .p2align 4
-L(loop):
- add $16, %ecx
- movdqa (%ecx), %xmm0
- pcmpeqd %xmm0, %xmm2
- pcmpeqd %xmm1, %xmm0
- pmovmskb %xmm2, %edx
- pmovmskb %xmm0, %eax
- or %eax, %edx
- jnz L(matches)
- add $16, %ecx
-
- movdqa (%ecx), %xmm0
- pcmpeqd %xmm0, %xmm2
- pcmpeqd %xmm1, %xmm0
- pmovmskb %xmm2, %edx
- pmovmskb %xmm0, %eax
- or %eax, %edx
- jnz L(matches)
- add $16, %ecx
-
- movdqa (%ecx), %xmm0
- pcmpeqd %xmm0, %xmm2
- pcmpeqd %xmm1, %xmm0
- pmovmskb %xmm2, %edx
- pmovmskb %xmm0, %eax
- or %eax, %edx
- jnz L(matches)
- add $16, %ecx
-
- movdqa (%ecx), %xmm0
- pcmpeqd %xmm0, %xmm2
- pcmpeqd %xmm1, %xmm0
- pmovmskb %xmm2, %edx
- pmovmskb %xmm0, %eax
- or %eax, %edx
- jz L(loop)
-
- .p2align 4
-L(matches):
- pmovmskb %xmm2, %edx
- test %eax, %eax
- jz L(return_null)
- test %edx, %edx
- jz L(match_case1)
-
- .p2align 4
-L(match_case2):
- test %al, %al
- jz L(match_higth_case2)
- test $15, %al
- jnz L(match_case2_4)
- test $15, %dl
- jnz L(return_null)
- lea 4(%ecx), %eax
- ret
-
- .p2align 4
-L(match_case2_4):
- mov %ecx, %eax
- ret
-
- .p2align 4
-L(match_higth_case2):
- test %dl, %dl
- jnz L(return_null)
- test $15, %ah
- jnz L(match_case2_12)
- test $15, %dh
- jnz L(return_null)
- lea 12(%ecx), %eax
- ret
-
- .p2align 4
-L(match_case2_12):
- lea 8(%ecx), %eax
- ret
-
- .p2align 4
-L(match_case1):
- test %al, %al
- jz L(match_higth_case1)
-
- test $0x01, %al
- jnz L(exit0)
- lea 4(%ecx), %eax
- ret
-
- .p2align 4
-L(match_higth_case1):
- test $0x01, %ah
- jnz L(exit3)
- lea 12(%ecx), %eax
- ret
-
- .p2align 4
-L(exit0):
- mov %ecx, %eax
- ret
-
- .p2align 4
-L(exit3):
- lea 8(%ecx), %eax
- ret
-
- .p2align 4
-L(return_null):
- xor %eax, %eax
- ret
-
-END (__wcschr_sse2)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wcschr.S b/sysdeps/i386/i686/multiarch/wcschr.S
deleted file mode 100644
index d3c65a6436..0000000000
--- a/sysdeps/i386/i686/multiarch/wcschr.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of wcschr
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if IS_IN (libc)
- .text
-ENTRY(__wcschr)
- .type wcschr, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__wcschr_ia32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__wcschr_sse2)
-2: ret
-END(__wcschr)
-weak_alias (__wcschr, wcschr)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wcscmp-c.c b/sysdeps/i386/i686/multiarch/wcscmp-c.c
deleted file mode 100644
index e3337d77e2..0000000000
--- a/sysdeps/i386/i686/multiarch/wcscmp-c.c
+++ /dev/null
@@ -1,14 +0,0 @@
-#include <wchar.h>
-
-#define WCSCMP __wcscmp_ia32
-#ifdef SHARED
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
- __hidden_ver1 (__wcscmp_ia32, __GI___wcscmp, __wcscmp_ia32);
-#endif
-#undef weak_alias
-#define weak_alias(name, alias)
-
-extern __typeof (wcscmp) __wcscmp_ia32;
-
-#include "wcsmbs/wcscmp.c"
diff --git a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S b/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
deleted file mode 100644
index a464b58204..0000000000
--- a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
+++ /dev/null
@@ -1,1018 +0,0 @@
-/* wcscmp with SSE2
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define ENTRANCE PUSH(%esi); PUSH(%edi)
-# define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
-# define PARMS 4
-# define STR1 PARMS
-# define STR2 STR1+4
-
-/* Note: wcscmp uses signed comparison, not unsugned as in strcmp function. */
-
- .text
-ENTRY (__wcscmp_sse2)
-/*
- * This implementation uses SSE to compare up to 16 bytes at a time.
-*/
- mov STR1(%esp), %edx
- mov STR2(%esp), %eax
-
- mov (%eax), %ecx
- cmp %ecx, (%edx)
- jne L(neq)
- test %ecx, %ecx
- jz L(eq)
-
- mov 4(%eax), %ecx
- cmp %ecx, 4(%edx)
- jne L(neq)
- test %ecx, %ecx
- jz L(eq)
-
- mov 8(%eax), %ecx
- cmp %ecx, 8(%edx)
- jne L(neq)
- test %ecx, %ecx
- jz L(eq)
-
- mov 12(%eax), %ecx
- cmp %ecx, 12(%edx)
- jne L(neq)
- test %ecx, %ecx
- jz L(eq)
-
- ENTRANCE
- add $16, %eax
- add $16, %edx
-
- mov %eax, %esi
- mov %edx, %edi
- pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
- mov %al, %ch
- mov %dl, %cl
- and $63, %eax /* esi alignment in cache line */
- and $63, %edx /* edi alignment in cache line */
- and $15, %cl
- jz L(continue_00)
- cmp $16, %edx
- jb L(continue_0)
- cmp $32, %edx
- jb L(continue_16)
- cmp $48, %edx
- jb L(continue_32)
-
-L(continue_48):
- and $15, %ch
- jz L(continue_48_00)
- cmp $16, %eax
- jb L(continue_0_48)
- cmp $32, %eax
- jb L(continue_16_48)
- cmp $48, %eax
- jb L(continue_32_48)
-
- .p2align 4
-L(continue_48_48):
- mov (%esi), %ecx
- cmp %ecx, (%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 4(%esi), %ecx
- cmp %ecx, 4(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 8(%esi), %ecx
- cmp %ecx, 8(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 12(%esi), %ecx
- cmp %ecx, 12(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- movdqu 16(%edi), %xmm1
- movdqu 16(%esi), %xmm2
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_16)
-
- movdqu 32(%edi), %xmm1
- movdqu 32(%esi), %xmm2
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_32)
-
- movdqu 48(%edi), %xmm1
- movdqu 48(%esi), %xmm2
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_48)
-
- add $64, %esi
- add $64, %edi
- jmp L(continue_48_48)
-
-L(continue_0):
- and $15, %ch
- jz L(continue_0_00)
- cmp $16, %eax
- jb L(continue_0_0)
- cmp $32, %eax
- jb L(continue_0_16)
- cmp $48, %eax
- jb L(continue_0_32)
-
- .p2align 4
-L(continue_0_48):
- mov (%esi), %ecx
- cmp %ecx, (%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 4(%esi), %ecx
- cmp %ecx, 4(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 8(%esi), %ecx
- cmp %ecx, 8(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 12(%esi), %ecx
- cmp %ecx, 12(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- movdqu 16(%edi), %xmm1
- movdqu 16(%esi), %xmm2
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_16)
-
- movdqu 32(%edi), %xmm1
- movdqu 32(%esi), %xmm2
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_32)
-
- mov 48(%esi), %ecx
- cmp %ecx, 48(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 52(%esi), %ecx
- cmp %ecx, 52(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 56(%esi), %ecx
- cmp %ecx, 56(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 60(%esi), %ecx
- cmp %ecx, 60(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- add $64, %esi
- add $64, %edi
- jmp L(continue_0_48)
-
- .p2align 4
-L(continue_00):
- and $15, %ch
- jz L(continue_00_00)
- cmp $16, %eax
- jb L(continue_00_0)
- cmp $32, %eax
- jb L(continue_00_16)
- cmp $48, %eax
- jb L(continue_00_32)
-
- .p2align 4
-L(continue_00_48):
- pcmpeqd (%edi), %xmm0
- mov (%edi), %eax
- pmovmskb %xmm0, %ecx
- test %ecx, %ecx
- jnz L(less4_double_words1)
-
- cmp (%esi), %eax
- jne L(nequal)
-
- mov 4(%edi), %eax
- cmp 4(%esi), %eax
- jne L(nequal)
-
- mov 8(%edi), %eax
- cmp 8(%esi), %eax
- jne L(nequal)
-
- mov 12(%edi), %eax
- cmp 12(%esi), %eax
- jne L(nequal)
-
- movdqu 16(%esi), %xmm2
- pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
- pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm2 /* packed sub of comparison results*/
- pmovmskb %xmm2, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_16)
-
- movdqu 32(%esi), %xmm2
- pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
- pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm2 /* packed sub of comparison results*/
- pmovmskb %xmm2, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_32)
-
- movdqu 48(%esi), %xmm2
- pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
- pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm2 /* packed sub of comparison results*/
- pmovmskb %xmm2, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_48)
-
- add $64, %esi
- add $64, %edi
- jmp L(continue_00_48)
-
- .p2align 4
-L(continue_32):
- and $15, %ch
- jz L(continue_32_00)
- cmp $16, %eax
- jb L(continue_0_32)
- cmp $32, %eax
- jb L(continue_16_32)
- cmp $48, %eax
- jb L(continue_32_32)
-
- .p2align 4
-L(continue_32_48):
- mov (%esi), %ecx
- cmp %ecx, (%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 4(%esi), %ecx
- cmp %ecx, 4(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 8(%esi), %ecx
- cmp %ecx, 8(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 12(%esi), %ecx
- cmp %ecx, 12(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 16(%esi), %ecx
- cmp %ecx, 16(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 20(%esi), %ecx
- cmp %ecx, 20(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 24(%esi), %ecx
- cmp %ecx, 24(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 28(%esi), %ecx
- cmp %ecx, 28(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- movdqu 32(%edi), %xmm1
- movdqu 32(%esi), %xmm2
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_32)
-
- movdqu 48(%edi), %xmm1
- movdqu 48(%esi), %xmm2
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results */
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_48)
-
- add $64, %esi
- add $64, %edi
- jmp L(continue_32_48)
-
- .p2align 4
-L(continue_16):
- and $15, %ch
- jz L(continue_16_00)
- cmp $16, %eax
- jb L(continue_0_16)
- cmp $32, %eax
- jb L(continue_16_16)
- cmp $48, %eax
- jb L(continue_16_32)
-
- .p2align 4
-L(continue_16_48):
- mov (%esi), %ecx
- cmp %ecx, (%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 4(%esi), %ecx
- cmp %ecx, 4(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 8(%esi), %ecx
- cmp %ecx, 8(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 12(%esi), %ecx
- cmp %ecx, 12(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- movdqu 16(%edi), %xmm1
- movdqu 16(%esi), %xmm2
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_16)
-
- mov 32(%esi), %ecx
- cmp %ecx, 32(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 36(%esi), %ecx
- cmp %ecx, 36(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 40(%esi), %ecx
- cmp %ecx, 40(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 44(%esi), %ecx
- cmp %ecx, 44(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- movdqu 48(%edi), %xmm1
- movdqu 48(%esi), %xmm2
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_48)
-
- add $64, %esi
- add $64, %edi
- jmp L(continue_16_48)
-
- .p2align 4
-L(continue_00_00):
- movdqa (%edi), %xmm1
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words)
-
- movdqa 16(%edi), %xmm3
- pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
- pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm3 /* packed sub of comparison results*/
- pmovmskb %xmm3, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_16)
-
- movdqa 32(%edi), %xmm5
- pcmpeqd %xmm5, %xmm0 /* Any null double_word? */
- pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm5 /* packed sub of comparison results*/
- pmovmskb %xmm5, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_32)
-
- movdqa 48(%edi), %xmm1
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_48)
-
- add $64, %esi
- add $64, %edi
- jmp L(continue_00_00)
-
- .p2align 4
-L(continue_00_32):
- movdqu (%esi), %xmm2
- pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
- pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm2 /* packed sub of comparison results*/
- pmovmskb %xmm2, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words)
-
- add $16, %esi
- add $16, %edi
- jmp L(continue_00_48)
-
- .p2align 4
-L(continue_00_16):
- movdqu (%esi), %xmm2
- pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
- pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm2 /* packed sub of comparison results*/
- pmovmskb %xmm2, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words)
-
- movdqu 16(%esi), %xmm2
- pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
- pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm2 /* packed sub of comparison results*/
- pmovmskb %xmm2, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_16)
-
- add $32, %esi
- add $32, %edi
- jmp L(continue_00_48)
-
- .p2align 4
-L(continue_00_0):
- movdqu (%esi), %xmm2
- pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
- pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm2 /* packed sub of comparison results*/
- pmovmskb %xmm2, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words)
-
- movdqu 16(%esi), %xmm2
- pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
- pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm2 /* packed sub of comparison results*/
- pmovmskb %xmm2, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_16)
-
- movdqu 32(%esi), %xmm2
- pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
- pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm2 /* packed sub of comparison results*/
- pmovmskb %xmm2, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_32)
-
- add $48, %esi
- add $48, %edi
- jmp L(continue_00_48)
-
- .p2align 4
-L(continue_48_00):
- pcmpeqd (%esi), %xmm0
- mov (%edi), %eax
- pmovmskb %xmm0, %ecx
- test %ecx, %ecx
- jnz L(less4_double_words1)
-
- cmp (%esi), %eax
- jne L(nequal)
-
- mov 4(%edi), %eax
- cmp 4(%esi), %eax
- jne L(nequal)
-
- mov 8(%edi), %eax
- cmp 8(%esi), %eax
- jne L(nequal)
-
- mov 12(%edi), %eax
- cmp 12(%esi), %eax
- jne L(nequal)
-
- movdqu 16(%edi), %xmm1
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_16)
-
- movdqu 32(%edi), %xmm1
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_32)
-
- movdqu 48(%edi), %xmm1
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_48)
-
- add $64, %esi
- add $64, %edi
- jmp L(continue_48_00)
-
- .p2align 4
-L(continue_32_00):
- movdqu (%edi), %xmm1
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words)
-
- add $16, %esi
- add $16, %edi
- jmp L(continue_48_00)
-
- .p2align 4
-L(continue_16_00):
- movdqu (%edi), %xmm1
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words)
-
- movdqu 16(%edi), %xmm1
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_16)
-
- add $32, %esi
- add $32, %edi
- jmp L(continue_48_00)
-
- .p2align 4
-L(continue_0_00):
- movdqu (%edi), %xmm1
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words)
-
- movdqu 16(%edi), %xmm1
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_16)
-
- movdqu 32(%edi), %xmm1
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_32)
-
- add $48, %esi
- add $48, %edi
- jmp L(continue_48_00)
-
- .p2align 4
-L(continue_32_32):
- movdqu (%edi), %xmm1
- movdqu (%esi), %xmm2
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words)
-
- add $16, %esi
- add $16, %edi
- jmp L(continue_48_48)
-
- .p2align 4
-L(continue_16_16):
- movdqu (%edi), %xmm1
- movdqu (%esi), %xmm2
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words)
-
- movdqu 16(%edi), %xmm3
- movdqu 16(%esi), %xmm4
- pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm3 /* packed sub of comparison results*/
- pmovmskb %xmm3, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_16)
-
- add $32, %esi
- add $32, %edi
- jmp L(continue_48_48)
-
- .p2align 4
-L(continue_0_0):
- movdqu (%edi), %xmm1
- movdqu (%esi), %xmm2
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words)
-
- movdqu 16(%edi), %xmm3
- movdqu 16(%esi), %xmm4
- pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm3 /* packed sub of comparison results*/
- pmovmskb %xmm3, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_16)
-
- movdqu 32(%edi), %xmm1
- movdqu 32(%esi), %xmm2
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_32)
-
- add $48, %esi
- add $48, %edi
- jmp L(continue_48_48)
-
- .p2align 4
-L(continue_0_16):
- movdqu (%edi), %xmm1
- movdqu (%esi), %xmm2
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words)
-
- movdqu 16(%edi), %xmm1
- movdqu 16(%esi), %xmm2
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words_16)
-
- add $32, %esi
- add $32, %edi
- jmp L(continue_32_48)
-
- .p2align 4
-L(continue_0_32):
- movdqu (%edi), %xmm1
- movdqu (%esi), %xmm2
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words)
-
- add $16, %esi
- add $16, %edi
- jmp L(continue_16_48)
-
- .p2align 4
-L(continue_16_32):
- movdqu (%edi), %xmm1
- movdqu (%esi), %xmm2
- pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
- pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
- pmovmskb %xmm1, %edx
- sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
- jnz L(less4_double_words)
-
- add $16, %esi
- add $16, %edi
- jmp L(continue_32_48)
-
- .p2align 4
-L(less4_double_words1):
- cmp (%esi), %eax
- jne L(nequal)
- test %eax, %eax
- jz L(equal)
-
- mov 4(%esi), %ecx
- cmp %ecx, 4(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 8(%esi), %ecx
- cmp %ecx, 8(%edi)
- jne L(nequal)
- test %ecx, %ecx
- jz L(equal)
-
- mov 12(%esi), %ecx
- cmp %ecx, 12(%edi)
- jne L(nequal)
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(less4_double_words):
- xor %eax, %eax
- test %dl, %dl
- jz L(next_two_double_words)
- and $15, %dl
- jz L(second_double_word)
- mov (%esi), %ecx
- cmp %ecx, (%edi)
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(second_double_word):
- mov 4(%esi), %ecx
- cmp %ecx, 4(%edi)
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(next_two_double_words):
- and $15, %dh
- jz L(fourth_double_word)
- mov 8(%esi), %ecx
- cmp %ecx, 8(%edi)
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(fourth_double_word):
- mov 12(%esi), %ecx
- cmp %ecx, 12(%edi)
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(less4_double_words_16):
- xor %eax, %eax
- test %dl, %dl
- jz L(next_two_double_words_16)
- and $15, %dl
- jz L(second_double_word_16)
- mov 16(%esi), %ecx
- cmp %ecx, 16(%edi)
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(second_double_word_16):
- mov 20(%esi), %ecx
- cmp %ecx, 20(%edi)
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(next_two_double_words_16):
- and $15, %dh
- jz L(fourth_double_word_16)
- mov 24(%esi), %ecx
- cmp %ecx, 24(%edi)
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(fourth_double_word_16):
- mov 28(%esi), %ecx
- cmp %ecx, 28(%edi)
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(less4_double_words_32):
- xor %eax, %eax
- test %dl, %dl
- jz L(next_two_double_words_32)
- and $15, %dl
- jz L(second_double_word_32)
- mov 32(%esi), %ecx
- cmp %ecx, 32(%edi)
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(second_double_word_32):
- mov 36(%esi), %ecx
- cmp %ecx, 36(%edi)
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(next_two_double_words_32):
- and $15, %dh
- jz L(fourth_double_word_32)
- mov 40(%esi), %ecx
- cmp %ecx, 40(%edi)
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(fourth_double_word_32):
- mov 44(%esi), %ecx
- cmp %ecx, 44(%edi)
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(less4_double_words_48):
- xor %eax, %eax
- test %dl, %dl
- jz L(next_two_double_words_48)
- and $15, %dl
- jz L(second_double_word_48)
- mov 48(%esi), %ecx
- cmp %ecx, 48(%edi)
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(second_double_word_48):
- mov 52(%esi), %ecx
- cmp %ecx, 52(%edi)
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(next_two_double_words_48):
- and $15, %dh
- jz L(fourth_double_word_48)
- mov 56(%esi), %ecx
- cmp %ecx, 56(%edi)
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(fourth_double_word_48):
- mov 60(%esi), %ecx
- cmp %ecx, 60(%edi)
- jne L(nequal)
- RETURN
-
- .p2align 4
-L(nequal):
- mov $1, %eax
- jg L(return)
- neg %eax
- RETURN
-
- .p2align 4
-L(return):
- RETURN
-
- .p2align 4
-L(equal):
- xorl %eax, %eax
- RETURN
-
- CFI_POP (%edi)
- CFI_POP (%esi)
-
- .p2align 4
-L(neq):
- mov $1, %eax
- jg L(neq_bigger)
- neg %eax
-
-L(neq_bigger):
- ret
-
- .p2align 4
-L(eq):
- xorl %eax, %eax
- ret
-
-END (__wcscmp_sse2)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wcscmp.S b/sysdeps/i386/i686/multiarch/wcscmp.S
deleted file mode 100644
index 7118bdd4db..0000000000
--- a/sysdeps/i386/i686/multiarch/wcscmp.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/* Multiple versions of wcscmp
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in libc and for the
- DSO. In static binaries, we need wcscmp before the initialization
- happened. */
-#if IS_IN (libc)
- .text
-ENTRY(__wcscmp)
- .type __wcscmp, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__wcscmp_ia32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__wcscmp_sse2)
-2: ret
-END(__wcscmp)
-weak_alias (__wcscmp, wcscmp)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wcscpy-c.c b/sysdeps/i386/i686/multiarch/wcscpy-c.c
deleted file mode 100644
index fb3000392b..0000000000
--- a/sysdeps/i386/i686/multiarch/wcscpy-c.c
+++ /dev/null
@@ -1,5 +0,0 @@
-#if IS_IN (libc)
-# define wcscpy __wcscpy_ia32
-#endif
-
-#include "wcsmbs/wcscpy.c"
diff --git a/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S b/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
deleted file mode 100644
index 6280ba92ab..0000000000
--- a/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
+++ /dev/null
@@ -1,600 +0,0 @@
-/* wcscpy with SSSE3
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-# include <sysdep.h>
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS 4
-# define RETURN POP (%edi); ret; CFI_PUSH (%edi)
-# define STR1 PARMS
-# define STR2 STR1+4
-# define LEN STR2+4
-
- atom_text_section
-ENTRY (__wcscpy_ssse3)
- mov STR1(%esp), %edx
- mov STR2(%esp), %ecx
-
- cmp $0, (%ecx)
- jz L(ExitTail4)
- cmp $0, 4(%ecx)
- jz L(ExitTail8)
- cmp $0, 8(%ecx)
- jz L(ExitTail12)
- cmp $0, 12(%ecx)
- jz L(ExitTail16)
-
- PUSH (%edi)
- mov %edx, %edi
- PUSH (%esi)
- lea 16(%ecx), %esi
-
- and $-16, %esi
-
- pxor %xmm0, %xmm0
- pcmpeqd (%esi), %xmm0
- movdqu (%ecx), %xmm1
- movdqu %xmm1, (%edx)
-
- pmovmskb %xmm0, %eax
- sub %ecx, %esi
-
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- mov %edx, %eax
- lea 16(%edx), %edx
- and $-16, %edx
- sub %edx, %eax
-
- sub %eax, %ecx
- mov %ecx, %eax
- and $0xf, %eax
- mov $0, %esi
-
- jz L(Align16Both)
- cmp $4, %eax
- je L(Shl4)
- cmp $8, %eax
- je L(Shl8)
- jmp L(Shl12)
-
-L(Align16Both):
- movaps (%ecx), %xmm1
- movaps 16(%ecx), %xmm2
- movaps %xmm1, (%edx)
- pcmpeqd %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- lea 16(%esi), %esi
-
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %esi), %xmm3
- movaps %xmm2, (%edx, %esi)
- pcmpeqd %xmm3, %xmm0
- pmovmskb %xmm0, %eax
- lea 16(%esi), %esi
-
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %esi), %xmm4
- movaps %xmm3, (%edx, %esi)
- pcmpeqd %xmm4, %xmm0
- pmovmskb %xmm0, %eax
- lea 16(%esi), %esi
-
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %esi), %xmm1
- movaps %xmm4, (%edx, %esi)
- pcmpeqd %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- lea 16(%esi), %esi
-
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %esi), %xmm2
- movaps %xmm1, (%edx, %esi)
- pcmpeqd %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- lea 16(%esi), %esi
-
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps 16(%ecx, %esi), %xmm3
- movaps %xmm2, (%edx, %esi)
- pcmpeqd %xmm3, %xmm0
- pmovmskb %xmm0, %eax
- lea 16(%esi), %esi
-
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- movaps %xmm3, (%edx, %esi)
- mov %ecx, %eax
- lea 16(%ecx, %esi), %ecx
- and $-0x40, %ecx
- sub %ecx, %eax
- sub %eax, %edx
-
- mov $-0x40, %esi
-
-L(Aligned64Loop):
- movaps (%ecx), %xmm2
- movaps 32(%ecx), %xmm3
- movaps %xmm2, %xmm4
- movaps 16(%ecx), %xmm5
- movaps %xmm3, %xmm6
- movaps 48(%ecx), %xmm7
- pminub %xmm5, %xmm2
- pminub %xmm7, %xmm3
- pminub %xmm2, %xmm3
- lea 64(%edx), %edx
- pcmpeqd %xmm0, %xmm3
- lea 64(%ecx), %ecx
- pmovmskb %xmm3, %eax
-
- test %eax, %eax
- jnz L(Aligned64Leave)
- movaps %xmm4, -64(%edx)
- movaps %xmm5, -48(%edx)
- movaps %xmm6, -32(%edx)
- movaps %xmm7, -16(%edx)
- jmp L(Aligned64Loop)
-
-L(Aligned64Leave):
- pcmpeqd %xmm4, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqd %xmm5, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm4, -64(%edx)
- test %eax, %eax
- lea 16(%esi), %esi
- jnz L(CopyFrom1To16Bytes)
-
- pcmpeqd %xmm6, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm5, -48(%edx)
- test %eax, %eax
- lea 16(%esi), %esi
- jnz L(CopyFrom1To16Bytes)
-
- movaps %xmm6, -32(%edx)
- pcmpeqd %xmm7, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- lea 16(%esi), %esi
- jnz L(CopyFrom1To16Bytes)
-
- mov $-0x40, %esi
- movaps %xmm7, -16(%edx)
- jmp L(Aligned64Loop)
-
- .p2align 4
-L(Shl4):
- movaps -4(%ecx), %xmm1
- movaps 12(%ecx), %xmm2
-L(Shl4Start):
- pcmpeqd %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm2, %xmm3
-
- test %eax, %eax
- jnz L(Shl4LoopExit)
-
- palignr $4, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 28(%ecx), %xmm2
-
- pcmpeqd %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm1
-
- test %eax, %eax
- jnz L(Shl4LoopExit)
-
- palignr $4, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- movaps 28(%ecx), %xmm2
-
- pcmpeqd %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-
- test %eax, %eax
- jnz L(Shl4LoopExit)
-
- palignr $4, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 28(%ecx), %xmm2
-
- pcmpeqd %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
-
- test %eax, %eax
- jnz L(Shl4LoopExit)
-
- palignr $4, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- lea 28(%ecx), %ecx
- lea 16(%edx), %edx
-
- mov %ecx, %eax
- and $-0x40, %ecx
- sub %ecx, %eax
- lea -12(%ecx), %ecx
- sub %eax, %edx
-
- movaps -4(%ecx), %xmm1
-
-L(Shl4LoopStart):
- movaps 12(%ecx), %xmm2
- movaps 28(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 44(%ecx), %xmm4
- movaps %xmm4, %xmm7
- movaps 60(%ecx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqd %xmm0, %xmm7
- pmovmskb %xmm7, %eax
- movaps %xmm5, %xmm7
- palignr $4, %xmm4, %xmm5
- test %eax, %eax
- palignr $4, %xmm3, %xmm4
- jnz L(Shl4Start)
-
- palignr $4, %xmm2, %xmm3
- lea 64(%ecx), %ecx
- palignr $4, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%edx)
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- jmp L(Shl4LoopStart)
-
-L(Shl4LoopExit):
- movlpd (%ecx), %xmm0
- movl 8(%ecx), %esi
- movlpd %xmm0, (%edx)
- movl %esi, 8(%edx)
- POP (%esi)
- add $12, %edx
- add $12, %ecx
- test %al, %al
- jz L(ExitHigh)
- test $0x01, %al
- jnz L(Exit4)
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movl %edi, %eax
- RETURN
-
- CFI_PUSH (%esi)
-
- .p2align 4
-L(Shl8):
- movaps -8(%ecx), %xmm1
- movaps 8(%ecx), %xmm2
-L(Shl8Start):
- pcmpeqd %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm2, %xmm3
-
- test %eax, %eax
- jnz L(Shl8LoopExit)
-
- palignr $8, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 24(%ecx), %xmm2
-
- pcmpeqd %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm1
-
- test %eax, %eax
- jnz L(Shl8LoopExit)
-
- palignr $8, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- movaps 24(%ecx), %xmm2
-
- pcmpeqd %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-
- test %eax, %eax
- jnz L(Shl8LoopExit)
-
- palignr $8, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 24(%ecx), %xmm2
-
- pcmpeqd %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
-
- test %eax, %eax
- jnz L(Shl8LoopExit)
-
- palignr $8, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- lea 24(%ecx), %ecx
- lea 16(%edx), %edx
-
- mov %ecx, %eax
- and $-0x40, %ecx
- sub %ecx, %eax
- lea -8(%ecx), %ecx
- sub %eax, %edx
-
- movaps -8(%ecx), %xmm1
-
-L(Shl8LoopStart):
- movaps 8(%ecx), %xmm2
- movaps 24(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 40(%ecx), %xmm4
- movaps %xmm4, %xmm7
- movaps 56(%ecx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqd %xmm0, %xmm7
- pmovmskb %xmm7, %eax
- movaps %xmm5, %xmm7
- palignr $8, %xmm4, %xmm5
- test %eax, %eax
- palignr $8, %xmm3, %xmm4
- jnz L(Shl8Start)
-
- palignr $8, %xmm2, %xmm3
- lea 64(%ecx), %ecx
- palignr $8, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%edx)
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- jmp L(Shl8LoopStart)
-
-L(Shl8LoopExit):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- POP (%esi)
- add $8, %edx
- add $8, %ecx
- test %al, %al
- jz L(ExitHigh)
- test $0x01, %al
- jnz L(Exit4)
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movl %edi, %eax
- RETURN
-
- CFI_PUSH (%esi)
-
- .p2align 4
-L(Shl12):
- movaps -12(%ecx), %xmm1
- movaps 4(%ecx), %xmm2
-L(Shl12Start):
- pcmpeqd %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- movaps %xmm2, %xmm3
-
- test %eax, %eax
- jnz L(Shl12LoopExit)
-
- palignr $12, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 20(%ecx), %xmm2
-
- pcmpeqd %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm1
-
- test %eax, %eax
- jnz L(Shl12LoopExit)
-
- palignr $12, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- movaps 20(%ecx), %xmm2
-
- pcmpeqd %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
-
- test %eax, %eax
- jnz L(Shl12LoopExit)
-
- palignr $12, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps 20(%ecx), %xmm2
-
- pcmpeqd %xmm2, %xmm0
- lea 16(%edx), %edx
- pmovmskb %xmm0, %eax
- lea 16(%ecx), %ecx
-
- test %eax, %eax
- jnz L(Shl12LoopExit)
-
- palignr $12, %xmm3, %xmm2
- movaps %xmm2, (%edx)
- lea 20(%ecx), %ecx
- lea 16(%edx), %edx
-
- mov %ecx, %eax
- and $-0x40, %ecx
- sub %ecx, %eax
- lea -4(%ecx), %ecx
- sub %eax, %edx
-
- movaps -12(%ecx), %xmm1
-
-L(Shl12LoopStart):
- movaps 4(%ecx), %xmm2
- movaps 20(%ecx), %xmm3
- movaps %xmm3, %xmm6
- movaps 36(%ecx), %xmm4
- movaps %xmm4, %xmm7
- movaps 52(%ecx), %xmm5
- pminub %xmm2, %xmm6
- pminub %xmm5, %xmm7
- pminub %xmm6, %xmm7
- pcmpeqd %xmm0, %xmm7
- pmovmskb %xmm7, %eax
- movaps %xmm5, %xmm7
- palignr $12, %xmm4, %xmm5
- test %eax, %eax
- palignr $12, %xmm3, %xmm4
- jnz L(Shl12Start)
-
- palignr $12, %xmm2, %xmm3
- lea 64(%ecx), %ecx
- palignr $12, %xmm1, %xmm2
- movaps %xmm7, %xmm1
- movaps %xmm5, 48(%edx)
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- jmp L(Shl12LoopStart)
-
-L(Shl12LoopExit):
- movl (%ecx), %esi
- movl %esi, (%edx)
- mov $4, %esi
-
- .p2align 4
-L(CopyFrom1To16Bytes):
- add %esi, %edx
- add %esi, %ecx
-
- POP (%esi)
- test %al, %al
- jz L(ExitHigh)
- test $0x01, %al
- jnz L(Exit4)
-L(Exit8):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movl %edi, %eax
- RETURN
-
- .p2align 4
-L(ExitHigh):
- test $0x01, %ah
- jnz L(Exit12)
-L(Exit16):
- movdqu (%ecx), %xmm0
- movdqu %xmm0, (%edx)
- movl %edi, %eax
- RETURN
-
- .p2align 4
-L(Exit4):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl %edi, %eax
- RETURN
-
- .p2align 4
-L(Exit12):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movl 8(%ecx), %eax
- movl %eax, 8(%edx)
- movl %edi, %eax
- RETURN
-
-CFI_POP (%edi)
-
- .p2align 4
-L(ExitTail4):
- movl (%ecx), %eax
- movl %eax, (%edx)
- movl %edx, %eax
- ret
-
- .p2align 4
-L(ExitTail8):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movl %edx, %eax
- ret
-
- .p2align 4
-L(ExitTail12):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movl 8(%ecx), %eax
- movl %eax, 8(%edx)
- movl %edx, %eax
- ret
-
- .p2align 4
-L(ExitTail16):
- movdqu (%ecx), %xmm0
- movdqu %xmm0, (%edx)
- movl %edx, %eax
- ret
-
-END (__wcscpy_ssse3)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wcscpy.S b/sysdeps/i386/i686/multiarch/wcscpy.S
deleted file mode 100644
index cfc97dd87c..0000000000
--- a/sysdeps/i386/i686/multiarch/wcscpy.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of wcscpy
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in libc. */
-#if IS_IN (libc)
- .text
-ENTRY(wcscpy)
- .type wcscpy, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__wcscpy_ia32)
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- LOAD_FUNC_GOT_EAX (__wcscpy_ssse3)
-2: ret
-END(wcscpy)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wcslen-c.c b/sysdeps/i386/i686/multiarch/wcslen-c.c
deleted file mode 100644
index a335dc0f7e..0000000000
--- a/sysdeps/i386/i686/multiarch/wcslen-c.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#include <wchar.h>
-
-#if IS_IN (libc)
-# define WCSLEN __wcslen_ia32
-#endif
-
-extern __typeof (wcslen) __wcslen_ia32;
-
-#include "wcsmbs/wcslen.c"
diff --git a/sysdeps/i386/i686/multiarch/wcslen-sse2.S b/sysdeps/i386/i686/multiarch/wcslen-sse2.S
deleted file mode 100644
index bd3fc4c79b..0000000000
--- a/sysdeps/i386/i686/multiarch/wcslen-sse2.S
+++ /dev/null
@@ -1,193 +0,0 @@
-/* wcslen with SSE2
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-# include <sysdep.h>
-# define STR 4
-
- .text
-ENTRY (__wcslen_sse2)
- mov STR(%esp), %edx
-
- cmp $0, (%edx)
- jz L(exit_tail0)
- cmp $0, 4(%edx)
- jz L(exit_tail1)
- cmp $0, 8(%edx)
- jz L(exit_tail2)
- cmp $0, 12(%edx)
- jz L(exit_tail3)
- cmp $0, 16(%edx)
- jz L(exit_tail4)
- cmp $0, 20(%edx)
- jz L(exit_tail5)
- cmp $0, 24(%edx)
- jz L(exit_tail6)
- cmp $0, 28(%edx)
- jz L(exit_tail7)
-
- pxor %xmm0, %xmm0
-
- lea 32(%edx), %eax
- lea 16(%edx), %ecx
- and $-16, %eax
-
- pcmpeqd (%eax), %xmm0
- pmovmskb %xmm0, %edx
- pxor %xmm1, %xmm1
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
- pcmpeqd (%eax), %xmm1
- pmovmskb %xmm1, %edx
- pxor %xmm2, %xmm2
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
- pcmpeqd (%eax), %xmm2
- pmovmskb %xmm2, %edx
- pxor %xmm3, %xmm3
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
- pcmpeqd (%eax), %xmm3
- pmovmskb %xmm3, %edx
- test %edx, %edx
- lea 16(%eax), %eax
- jnz L(exit)
-
- and $-0x40, %eax
-
- .p2align 4
-L(aligned_64_loop):
- movaps (%eax), %xmm0
- movaps 16(%eax), %xmm1
- movaps 32(%eax), %xmm2
- movaps 48(%eax), %xmm6
-
- pminub %xmm1, %xmm0
- pminub %xmm6, %xmm2
- pminub %xmm0, %xmm2
- pcmpeqd %xmm3, %xmm2
- pmovmskb %xmm2, %edx
- test %edx, %edx
- lea 64(%eax), %eax
- jz L(aligned_64_loop)
-
- pcmpeqd -64(%eax), %xmm3
- pmovmskb %xmm3, %edx
- test %edx, %edx
- lea 48(%ecx), %ecx
- jnz L(exit)
-
- pcmpeqd %xmm1, %xmm3
- pmovmskb %xmm3, %edx
- test %edx, %edx
- lea -16(%ecx), %ecx
- jnz L(exit)
-
- pcmpeqd -32(%eax), %xmm3
- pmovmskb %xmm3, %edx
- test %edx, %edx
- lea -16(%ecx), %ecx
- jnz L(exit)
-
- pcmpeqd %xmm6, %xmm3
- pmovmskb %xmm3, %edx
- test %edx, %edx
- lea -16(%ecx), %ecx
- jnz L(exit)
-
- jmp L(aligned_64_loop)
-
- .p2align 4
-L(exit):
- sub %ecx, %eax
- shr $2, %eax
- test %dl, %dl
- jz L(exit_high)
-
- mov %dl, %cl
- and $15, %cl
- jz L(exit_1)
- ret
-
- .p2align 4
-L(exit_high):
- mov %dh, %ch
- and $15, %ch
- jz L(exit_3)
- add $2, %eax
- ret
-
- .p2align 4
-L(exit_1):
- add $1, %eax
- ret
-
- .p2align 4
-L(exit_3):
- add $3, %eax
- ret
-
- .p2align 4
-L(exit_tail0):
- xor %eax, %eax
- ret
-
- .p2align 4
-L(exit_tail1):
- mov $1, %eax
- ret
-
- .p2align 4
-L(exit_tail2):
- mov $2, %eax
- ret
-
- .p2align 4
-L(exit_tail3):
- mov $3, %eax
- ret
-
- .p2align 4
-L(exit_tail4):
- mov $4, %eax
- ret
-
- .p2align 4
-L(exit_tail5):
- mov $5, %eax
- ret
-
- .p2align 4
-L(exit_tail6):
- mov $6, %eax
- ret
-
- .p2align 4
-L(exit_tail7):
- mov $7, %eax
- ret
-
-END (__wcslen_sse2)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wcslen.S b/sysdeps/i386/i686/multiarch/wcslen.S
deleted file mode 100644
index 6ef9b6e7b5..0000000000
--- a/sysdeps/i386/i686/multiarch/wcslen.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of wcslen
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if IS_IN (libc)
- .text
-ENTRY(__wcslen)
- .type __wcslen, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__wcslen_ia32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__wcslen_sse2)
-2: ret
-END(__wcslen)
-
-weak_alias(__wcslen, wcslen)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wcsrchr-c.c b/sysdeps/i386/i686/multiarch/wcsrchr-c.c
deleted file mode 100644
index 8d8a335b5b..0000000000
--- a/sysdeps/i386/i686/multiarch/wcsrchr-c.c
+++ /dev/null
@@ -1,5 +0,0 @@
-#if IS_IN (libc)
-# define wcsrchr __wcsrchr_ia32
-#endif
-
-#include "wcsmbs/wcsrchr.c"
diff --git a/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S b/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S
deleted file mode 100644
index 1a9b60e55e..0000000000
--- a/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S
+++ /dev/null
@@ -1,354 +0,0 @@
-/* wcsrchr with SSE2, without using bsf instructions.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-# include <sysdep.h>
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS 8
-# define ENTRANCE PUSH (%edi);
-# define RETURN POP (%edi); ret; CFI_PUSH (%edi);
-# define STR1 PARMS
-# define STR2 STR1+4
-
- atom_text_section
-ENTRY (__wcsrchr_sse2)
-
- ENTRANCE
- mov STR1(%esp), %ecx
- movd STR2(%esp), %xmm1
-
- mov %ecx, %edi
- punpckldq %xmm1, %xmm1
- pxor %xmm2, %xmm2
- punpckldq %xmm1, %xmm1
-
-/* ECX has OFFSET. */
- and $63, %ecx
- cmp $48, %ecx
- ja L(crosscache)
-
-/* unaligned string. */
- movdqu (%edi), %xmm0
- pcmpeqd %xmm0, %xmm2
- pcmpeqd %xmm1, %xmm0
-/* Find where NULL is. */
- pmovmskb %xmm2, %ecx
-/* Check if there is a match. */
- pmovmskb %xmm0, %eax
- add $16, %edi
-
- test %eax, %eax
- jnz L(unaligned_match1)
-
- test %ecx, %ecx
- jnz L(return_null)
-
- and $-16, %edi
-
- PUSH (%esi)
-
- xor %edx, %edx
- jmp L(loop)
-
- CFI_POP (%esi)
-
- .p2align 4
-L(unaligned_match1):
- test %ecx, %ecx
- jnz L(prolog_find_zero_1)
-
- PUSH (%esi)
-
-/* Save current match */
- mov %eax, %edx
- mov %edi, %esi
- and $-16, %edi
- jmp L(loop)
-
- CFI_POP (%esi)
-
- .p2align 4
-L(crosscache):
-/* Hancle unaligned string. */
- and $15, %ecx
- and $-16, %edi
- pxor %xmm3, %xmm3
- movdqa (%edi), %xmm0
- pcmpeqd %xmm0, %xmm3
- pcmpeqd %xmm1, %xmm0
-/* Find where NULL is. */
- pmovmskb %xmm3, %edx
-/* Check if there is a match. */
- pmovmskb %xmm0, %eax
-/* Remove the leading bytes. */
- shr %cl, %edx
- shr %cl, %eax
- add $16, %edi
-
- test %eax, %eax
- jnz L(unaligned_match)
-
- test %edx, %edx
- jnz L(return_null)
-
- PUSH (%esi)
-
- xor %edx, %edx
- jmp L(loop)
-
- CFI_POP (%esi)
-
- .p2align 4
-L(unaligned_match):
- test %edx, %edx
- jnz L(prolog_find_zero)
-
- PUSH (%esi)
-
- mov %eax, %edx
- lea (%edi, %ecx), %esi
-
-/* Loop start on aligned string. */
- .p2align 4
-L(loop):
- movdqa (%edi), %xmm0
- pcmpeqd %xmm0, %xmm2
- add $16, %edi
- pcmpeqd %xmm1, %xmm0
- pmovmskb %xmm2, %ecx
- pmovmskb %xmm0, %eax
- or %eax, %ecx
- jnz L(matches)
-
- movdqa (%edi), %xmm3
- pcmpeqd %xmm3, %xmm2
- add $16, %edi
- pcmpeqd %xmm1, %xmm3
- pmovmskb %xmm2, %ecx
- pmovmskb %xmm3, %eax
- or %eax, %ecx
- jnz L(matches)
-
- movdqa (%edi), %xmm4
- pcmpeqd %xmm4, %xmm2
- add $16, %edi
- pcmpeqd %xmm1, %xmm4
- pmovmskb %xmm2, %ecx
- pmovmskb %xmm4, %eax
- or %eax, %ecx
- jnz L(matches)
-
- movdqa (%edi), %xmm5
- pcmpeqd %xmm5, %xmm2
- add $16, %edi
- pcmpeqd %xmm1, %xmm5
- pmovmskb %xmm2, %ecx
- pmovmskb %xmm5, %eax
- or %eax, %ecx
- jz L(loop)
-
- .p2align 4
-L(matches):
- test %eax, %eax
- jnz L(match)
-L(return_value):
- test %edx, %edx
- jz L(return_null_1)
- mov %edx, %eax
- mov %esi, %edi
-
- POP (%esi)
-
- test %ah, %ah
- jnz L(match_third_or_fourth_wchar)
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%edi), %eax
- RETURN
-
- CFI_PUSH (%esi)
-
- .p2align 4
-L(return_null_1):
- POP (%esi)
-
- xor %eax, %eax
- RETURN
-
- CFI_PUSH (%esi)
-
- .p2align 4
-L(match):
- pmovmskb %xmm2, %ecx
- test %ecx, %ecx
- jnz L(find_zero)
-/* save match info */
- mov %eax, %edx
- mov %edi, %esi
- jmp L(loop)
-
- .p2align 4
-L(find_zero):
- test %cl, %cl
- jz L(find_zero_in_third_or_fourth_wchar)
- test $15, %cl
- jz L(find_zero_in_second_wchar)
- and $1, %eax
- jz L(return_value)
-
- POP (%esi)
-
- lea -16(%edi), %eax
- RETURN
-
- CFI_PUSH (%esi)
-
- .p2align 4
-L(find_zero_in_second_wchar):
- and $1 << 5 - 1, %eax
- jz L(return_value)
-
- POP (%esi)
-
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%edi), %eax
- RETURN
-
- CFI_PUSH (%esi)
-
- .p2align 4
-L(find_zero_in_third_or_fourth_wchar):
- test $15, %ch
- jz L(find_zero_in_fourth_wchar)
- and $1 << 9 - 1, %eax
- jz L(return_value)
-
- POP (%esi)
-
- test %ah, %ah
- jnz L(match_third_wchar)
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%edi), %eax
- RETURN
-
- CFI_PUSH (%esi)
-
- .p2align 4
-L(find_zero_in_fourth_wchar):
-
- POP (%esi)
-
- test %ah, %ah
- jnz L(match_third_or_fourth_wchar)
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%edi), %eax
- RETURN
-
- CFI_PUSH (%esi)
-
- .p2align 4
-L(match_second_wchar):
- lea -12(%edi), %eax
- RETURN
-
- .p2align 4
-L(match_third_or_fourth_wchar):
- test $15 << 4, %ah
- jnz L(match_fourth_wchar)
- lea -8(%edi), %eax
- RETURN
-
- .p2align 4
-L(match_third_wchar):
- lea -8(%edi), %eax
- RETURN
-
- .p2align 4
-L(match_fourth_wchar):
- lea -4(%edi), %eax
- RETURN
-
- .p2align 4
-L(return_null):
- xor %eax, %eax
- RETURN
-
- .p2align 4
-L(prolog_find_zero):
- add %ecx, %edi
- mov %edx, %ecx
-L(prolog_find_zero_1):
- test %cl, %cl
- jz L(prolog_find_zero_in_third_or_fourth_wchar)
- test $15, %cl
- jz L(prolog_find_zero_in_second_wchar)
- and $1, %eax
- jz L(return_null)
-
- lea -16(%edi), %eax
- RETURN
-
- .p2align 4
-L(prolog_find_zero_in_second_wchar):
- and $1 << 5 - 1, %eax
- jz L(return_null)
-
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%edi), %eax
- RETURN
-
- .p2align 4
-L(prolog_find_zero_in_third_or_fourth_wchar):
- test $15, %ch
- jz L(prolog_find_zero_in_fourth_wchar)
- and $1 << 9 - 1, %eax
- jz L(return_null)
-
- test %ah, %ah
- jnz L(match_third_wchar)
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%edi), %eax
- RETURN
-
- .p2align 4
-L(prolog_find_zero_in_fourth_wchar):
- test %ah, %ah
- jnz L(match_third_or_fourth_wchar)
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%edi), %eax
- RETURN
-
-END (__wcsrchr_sse2)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wcsrchr.S b/sysdeps/i386/i686/multiarch/wcsrchr.S
deleted file mode 100644
index cf67333995..0000000000
--- a/sysdeps/i386/i686/multiarch/wcsrchr.S
+++ /dev/null
@@ -1,35 +0,0 @@
-/* Multiple versions of wcsrchr
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if IS_IN (libc)
- .text
-ENTRY(wcsrchr)
- .type wcsrchr, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__wcsrchr_ia32)
- HAS_CPU_FEATURE (SSE2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__wcsrchr_sse2)
-2: ret
-END(wcsrchr)
-#endif
diff --git a/sysdeps/i386/i686/multiarch/wmemcmp-c.c b/sysdeps/i386/i686/multiarch/wmemcmp-c.c
deleted file mode 100644
index 75ab4b94c1..0000000000
--- a/sysdeps/i386/i686/multiarch/wmemcmp-c.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#include <wchar.h>
-
-#if IS_IN (libc)
-# define WMEMCMP __wmemcmp_ia32
-#endif
-
-extern __typeof (wmemcmp) __wmemcmp_ia32;
-
-#include "wcsmbs/wmemcmp.c"
diff --git a/sysdeps/i386/i686/multiarch/wmemcmp-sse4.S b/sysdeps/i386/i686/multiarch/wmemcmp-sse4.S
deleted file mode 100644
index 1a857c7e21..0000000000
--- a/sysdeps/i386/i686/multiarch/wmemcmp-sse4.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_WMEMCMP 1
-#define MEMCMP __wmemcmp_sse4_2
-
-#include "memcmp-sse4.S"
diff --git a/sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S b/sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S
deleted file mode 100644
index a41ef95fc1..0000000000
--- a/sysdeps/i386/i686/multiarch/wmemcmp-ssse3.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_WMEMCMP 1
-#define MEMCMP __wmemcmp_ssse3
-
-#include "memcmp-ssse3.S"
diff --git a/sysdeps/i386/i686/multiarch/wmemcmp.S b/sysdeps/i386/i686/multiarch/wmemcmp.S
deleted file mode 100644
index 1b9a54a413..0000000000
--- a/sysdeps/i386/i686/multiarch/wmemcmp.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* Multiple versions of wmemcmp
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2011-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in libc. */
-
-#if IS_IN (libc)
- .text
-ENTRY(wmemcmp)
- .type wmemcmp, @gnu_indirect_function
- LOAD_GOT_AND_RTLD_GLOBAL_RO
- LOAD_FUNC_GOT_EAX (__wmemcmp_ia32)
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- LOAD_FUNC_GOT_EAX (__wmemcmp_ssse3)
- HAS_CPU_FEATURE (SSE4_2)
- jz 2f
- LOAD_FUNC_GOT_EAX (__wmemcmp_sse4_2)
-2: ret
-END(wmemcmp)
-#endif
diff --git a/sysdeps/i386/i686/nptl/tls.h b/sysdeps/i386/i686/nptl/tls.h
deleted file mode 100644
index 5b527af9d3..0000000000
--- a/sysdeps/i386/i686/nptl/tls.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* Copyright (C) 2002-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifndef _TLS_H
-
-/* Additional definitions for <tls.h> on i686 and up. */
-
-
-/* Macros to load from and store into segment registers. We can use
- the 32-bit instructions. */
-#define TLS_GET_GS() \
- ({ int __seg; __asm ("movl %%gs, %0" : "=q" (__seg)); __seg; })
-#define TLS_SET_GS(val) \
- __asm ("movl %0, %%gs" :: "q" (val))
-
-
-/* Get the full set of definitions. */
-#include_next <tls.h>
-
-#endif /* tls.h */
diff --git a/sysdeps/i386/i686/pthread_spin_trylock.S b/sysdeps/i386/i686/pthread_spin_trylock.S
deleted file mode 100644
index ce9c94d41a..0000000000
--- a/sysdeps/i386/i686/pthread_spin_trylock.S
+++ /dev/null
@@ -1,20 +0,0 @@
-/* Copyright (C) 2002-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#define HAVE_CMOV 1
-#include <sysdeps/i386/pthread_spin_trylock.S>
diff --git a/sysdeps/i386/i686/stack-aliasing.h b/sysdeps/i386/i686/stack-aliasing.h
deleted file mode 100644
index 9b5a1b0d47..0000000000
--- a/sysdeps/i386/i686/stack-aliasing.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* Define macros for stack address aliasing issues for NPTL. i686 version.
- Copyright (C) 2014-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-/* What is useful is to avoid the 64k aliasing problem which reliably
- happens if all stacks use sizes which are a multiple of 64k. Tell
- the stack allocator to disturb this by allocation one more page if
- necessary. */
-#define MULTI_PAGE_ALIASING 65536
diff --git a/sysdeps/i386/i686/strcmp.S b/sysdeps/i386/i686/strcmp.S
deleted file mode 100644
index 1ae305912e..0000000000
--- a/sysdeps/i386/i686/strcmp.S
+++ /dev/null
@@ -1,52 +0,0 @@
-/* Highly optimized version for ix86, x>=6.
- Copyright (C) 1999-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-#define PARMS 4 /* no space for saved regs */
-#define STR1 PARMS
-#define STR2 STR1+4
-
- .text
-ENTRY (strcmp)
-
- movl STR1(%esp), %ecx
- movl STR2(%esp), %edx
-
-L(oop): movb (%ecx), %al
- cmpb (%edx), %al
- jne L(neq)
- incl %ecx
- incl %edx
- testb %al, %al
- jnz L(oop)
-
- xorl %eax, %eax
- /* when strings are equal, pointers rest one beyond
- the end of the NUL terminators. */
- ret
-
-L(neq): movl $1, %eax
- movl $-1, %ecx
- cmovbl %ecx, %eax
-
- ret
-END (strcmp)
-libc_hidden_builtin_def (strcmp)
diff --git a/sysdeps/i386/i686/tst-stack-align.h b/sysdeps/i386/i686/tst-stack-align.h
deleted file mode 100644
index 51f03fe77b..0000000000
--- a/sysdeps/i386/i686/tst-stack-align.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* Copyright (C) 2003-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <stdio.h>
-#include <stdint.h>
-#ifndef __SSE__
-#include_next <tst-stack-align.h>
-#else
-#include <xmmintrin.h>
-
-#define TEST_STACK_ALIGN() \
- ({ \
- __m128 _m; \
- double _d = 12.0; \
- long double _ld = 15.0; \
- int _ret = 0; \
- printf ("__m128: %p %zu\n", &_m, __alignof (__m128)); \
- if ((((uintptr_t) &_m) & (__alignof (__m128) - 1)) != 0) \
- _ret = 1; \
- \
- printf ("double: %g %p %zu\n", _d, &_d, __alignof (double)); \
- if ((((uintptr_t) &_d) & (__alignof (double) - 1)) != 0) \
- _ret = 1; \
- \
- printf ("ldouble: %Lg %p %zu\n", _ld, &_ld, __alignof (long double)); \
- if ((((uintptr_t) &_ld) & (__alignof (long double) - 1)) != 0) \
- _ret = 1; \
- _ret; \
- })
-#endif