summaryrefslogtreecommitdiff
path: root/sysdeps/i386/i686/multiarch/memcmp-sse4.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/i386/i686/multiarch/memcmp-sse4.S')
-rw-r--r--sysdeps/i386/i686/multiarch/memcmp-sse4.S1225
1 files changed, 0 insertions, 1225 deletions
diff --git a/sysdeps/i386/i686/multiarch/memcmp-sse4.S b/sysdeps/i386/i686/multiarch/memcmp-sse4.S
deleted file mode 100644
index 2aa13048b2..0000000000
--- a/sysdeps/i386/i686/multiarch/memcmp-sse4.S
+++ /dev/null
@@ -1,1225 +0,0 @@
-/* memcmp with SSE4.2, wmemcmp with SSE4.2
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# ifndef MEMCMP
-# define MEMCMP __memcmp_sse4_2
-# endif
-
-# define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
-
-# define PARMS 4
-# define BLK1 PARMS
-# define BLK2 BLK1 + 4
-# define LEN BLK2 + 4
-# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx)
-
-
-# ifdef SHARED
-# define JMPTBL(I, B) I - B
-
-/* Load an entry in a jump table into EBX and branch to it. TABLE is a
- jump table with relative offsets. INDEX is a register contains the
- index into the jump table. SCALE is the scale of INDEX. */
-
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
-/* We first load PC into EBX. */ \
- SETUP_PIC_REG(bx); \
-/* Get the address of the jump table. */ \
- addl $(TABLE - .), %ebx; \
-/* Get the entry and convert the relative offset to the \
- absolute address. */ \
- addl (%ebx,INDEX,SCALE), %ebx; \
-/* We loaded the jump table and adjusted EDX/ESI. Go. */ \
- jmp *%ebx
-# else
-# define JMPTBL(I, B) I
-
-/* Load an entry in a jump table into EBX and branch to it. TABLE is a
- jump table with relative offsets. INDEX is a register contains the
- index into the jump table. SCALE is the scale of INDEX. */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- jmp *TABLE(,INDEX,SCALE)
-# endif
-
-
-/* Warning!
- wmemcmp has to use SIGNED comparison for elements.
- memcmp has to use UNSIGNED comparison for elemnts.
-*/
-
- .section .text.sse4.2,"ax",@progbits
-ENTRY (MEMCMP)
- movl BLK1(%esp), %eax
- movl BLK2(%esp), %edx
- movl LEN(%esp), %ecx
-
-# ifdef USE_AS_WMEMCMP
- shl $2, %ecx
- test %ecx, %ecx
- jz L(return0)
-# else
- cmp $1, %ecx
- jbe L(less1bytes)
-# endif
-
- pxor %xmm0, %xmm0
- cmp $64, %ecx
- ja L(64bytesormore)
- cmp $8, %ecx
-
-# ifndef USE_AS_WMEMCMP
- PUSH (%ebx)
- jb L(less8bytes)
-# else
- jb L(less8bytes)
- PUSH (%ebx)
-# endif
-
- add %ecx, %edx
- add %ecx, %eax
- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
-
-# ifndef USE_AS_WMEMCMP
- .p2align 4
-L(less8bytes):
- mov (%eax), %bl
- cmpb (%edx), %bl
- jne L(nonzero)
-
- mov 1(%eax), %bl
- cmpb 1(%edx), %bl
- jne L(nonzero)
-
- cmp $2, %ecx
- jz L(0bytes)
-
- mov 2(%eax), %bl
- cmpb 2(%edx), %bl
- jne L(nonzero)
-
- cmp $3, %ecx
- jz L(0bytes)
-
- mov 3(%eax), %bl
- cmpb 3(%edx), %bl
- jne L(nonzero)
-
- cmp $4, %ecx
- jz L(0bytes)
-
- mov 4(%eax), %bl
- cmpb 4(%edx), %bl
- jne L(nonzero)
-
- cmp $5, %ecx
- jz L(0bytes)
-
- mov 5(%eax), %bl
- cmpb 5(%edx), %bl
- jne L(nonzero)
-
- cmp $6, %ecx
- jz L(0bytes)
-
- mov 6(%eax), %bl
- cmpb 6(%edx), %bl
- je L(0bytes)
-
-L(nonzero):
- POP (%ebx)
- mov $1, %eax
- ja L(above)
- neg %eax
-L(above):
- ret
- CFI_PUSH (%ebx)
-# endif
-
- .p2align 4
-L(0bytes):
- POP (%ebx)
- xor %eax, %eax
- ret
-
-# ifdef USE_AS_WMEMCMP
-
-/* for wmemcmp, case N == 1 */
-
- .p2align 4
-L(less8bytes):
- mov (%eax), %ecx
- cmp (%edx), %ecx
- je L(return0)
- mov $1, %eax
- jg L(find_diff_bigger)
- neg %eax
- ret
-
- .p2align 4
-L(find_diff_bigger):
- ret
-
- .p2align 4
-L(return0):
- xor %eax, %eax
- ret
-# endif
-
-# ifndef USE_AS_WMEMCMP
- .p2align 4
-L(less1bytes):
- jb L(0bytesend)
- movzbl (%eax), %eax
- movzbl (%edx), %edx
- sub %edx, %eax
- ret
-
- .p2align 4
-L(0bytesend):
- xor %eax, %eax
- ret
-# endif
- .p2align 4
-L(64bytesormore):
- PUSH (%ebx)
- mov %ecx, %ebx
- mov $64, %ecx
- sub $64, %ebx
-L(64bytesormore_loop):
- movdqu (%eax), %xmm1
- movdqu (%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(find_16diff)
-
- movdqu 16(%eax), %xmm1
- movdqu 16(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(find_32diff)
-
- movdqu 32(%eax), %xmm1
- movdqu 32(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(find_48diff)
-
- movdqu 48(%eax), %xmm1
- movdqu 48(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(find_64diff)
- add %ecx, %eax
- add %ecx, %edx
- sub %ecx, %ebx
- jae L(64bytesormore_loop)
- add %ebx, %ecx
- add %ecx, %edx
- add %ecx, %eax
- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
-
-# ifdef USE_AS_WMEMCMP
-
-/* Label needs only for table_64bytes filling */
-L(unreal_case):
-/* no code here */
-
-# endif
- .p2align 4
-L(find_16diff):
- sub $16, %ecx
-L(find_32diff):
- sub $16, %ecx
-L(find_48diff):
- sub $16, %ecx
-L(find_64diff):
- add %ecx, %edx
- add %ecx, %eax
-
-# ifndef USE_AS_WMEMCMP
- .p2align 4
-L(16bytes):
- mov -16(%eax), %ecx
- mov -16(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(12bytes):
- mov -12(%eax), %ecx
- mov -12(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(8bytes):
- mov -8(%eax), %ecx
- mov -8(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(4bytes):
- mov -4(%eax), %ecx
- mov -4(%edx), %ebx
- cmp %ebx, %ecx
- mov $0, %eax
- jne L(find_diff)
- RETURN
-# else
- .p2align 4
-L(16bytes):
- mov -16(%eax), %ecx
- cmp -16(%edx), %ecx
- jne L(find_diff)
-L(12bytes):
- mov -12(%eax), %ecx
- cmp -12(%edx), %ecx
- jne L(find_diff)
-L(8bytes):
- mov -8(%eax), %ecx
- cmp -8(%edx), %ecx
- jne L(find_diff)
-L(4bytes):
- mov -4(%eax), %ecx
- cmp -4(%edx), %ecx
- mov $0, %eax
- jne L(find_diff)
- RETURN
-# endif
-
-# ifndef USE_AS_WMEMCMP
- .p2align 4
-L(49bytes):
- movdqu -49(%eax), %xmm1
- movdqu -49(%edx), %xmm2
- mov $-49, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(33bytes):
- movdqu -33(%eax), %xmm1
- movdqu -33(%edx), %xmm2
- mov $-33, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(17bytes):
- mov -17(%eax), %ecx
- mov -17(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(13bytes):
- mov -13(%eax), %ecx
- mov -13(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(9bytes):
- mov -9(%eax), %ecx
- mov -9(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(5bytes):
- mov -5(%eax), %ecx
- mov -5(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzbl -1(%eax), %ecx
- cmp -1(%edx), %cl
- mov $0, %eax
- jne L(end)
- RETURN
-
- .p2align 4
-L(50bytes):
- mov $-50, %ebx
- movdqu -50(%eax), %xmm1
- movdqu -50(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(34bytes):
- mov $-34, %ebx
- movdqu -34(%eax), %xmm1
- movdqu -34(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(18bytes):
- mov -18(%eax), %ecx
- mov -18(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(14bytes):
- mov -14(%eax), %ecx
- mov -14(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(10bytes):
- mov -10(%eax), %ecx
- mov -10(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(6bytes):
- mov -6(%eax), %ecx
- mov -6(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(2bytes):
- movzwl -2(%eax), %ecx
- movzwl -2(%edx), %ebx
- cmp %bl, %cl
- jne L(end)
- cmp %bh, %ch
- mov $0, %eax
- jne L(end)
- RETURN
-
- .p2align 4
-L(51bytes):
- mov $-51, %ebx
- movdqu -51(%eax), %xmm1
- movdqu -51(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(35bytes):
- mov $-35, %ebx
- movdqu -35(%eax), %xmm1
- movdqu -35(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(19bytes):
- movl -19(%eax), %ecx
- movl -19(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(15bytes):
- movl -15(%eax), %ecx
- movl -15(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(11bytes):
- movl -11(%eax), %ecx
- movl -11(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(7bytes):
- movl -7(%eax), %ecx
- movl -7(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-L(3bytes):
- movzwl -3(%eax), %ecx
- movzwl -3(%edx), %ebx
- cmpb %bl, %cl
- jne L(end)
- cmp %bx, %cx
- jne L(end)
-L(1bytes):
- movzbl -1(%eax), %eax
- cmpb -1(%edx), %al
- mov $0, %eax
- jne L(end)
- RETURN
-# endif
- .p2align 4
-L(52bytes):
- movdqu -52(%eax), %xmm1
- movdqu -52(%edx), %xmm2
- mov $-52, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(36bytes):
- movdqu -36(%eax), %xmm1
- movdqu -36(%edx), %xmm2
- mov $-36, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(20bytes):
- movdqu -20(%eax), %xmm1
- movdqu -20(%edx), %xmm2
- mov $-20, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- mov -4(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -4(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -4(%edx), %ecx
-# endif
- mov $0, %eax
- jne L(find_diff)
- RETURN
-
-# ifndef USE_AS_WMEMCMP
- .p2align 4
-L(53bytes):
- movdqu -53(%eax), %xmm1
- movdqu -53(%edx), %xmm2
- mov $-53, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(37bytes):
- mov $-37, %ebx
- movdqu -37(%eax), %xmm1
- movdqu -37(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(21bytes):
- mov $-21, %ebx
- movdqu -21(%eax), %xmm1
- movdqu -21(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- mov -5(%eax), %ecx
- mov -5(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzbl -1(%eax), %ecx
- cmp -1(%edx), %cl
- mov $0, %eax
- jne L(end)
- RETURN
-
- .p2align 4
-L(54bytes):
- movdqu -54(%eax), %xmm1
- movdqu -54(%edx), %xmm2
- mov $-54, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(38bytes):
- mov $-38, %ebx
- movdqu -38(%eax), %xmm1
- movdqu -38(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(22bytes):
- mov $-22, %ebx
- movdqu -22(%eax), %xmm1
- movdqu -22(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-
- mov -6(%eax), %ecx
- mov -6(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzwl -2(%eax), %ecx
- movzwl -2(%edx), %ebx
- cmp %bl, %cl
- jne L(end)
- cmp %bh, %ch
- mov $0, %eax
- jne L(end)
- RETURN
-
- .p2align 4
-L(55bytes):
- movdqu -55(%eax), %xmm1
- movdqu -55(%edx), %xmm2
- mov $-55, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(39bytes):
- mov $-39, %ebx
- movdqu -39(%eax), %xmm1
- movdqu -39(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(23bytes):
- mov $-23, %ebx
- movdqu -23(%eax), %xmm1
- movdqu -23(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- movl -7(%eax), %ecx
- movl -7(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzwl -3(%eax), %ecx
- movzwl -3(%edx), %ebx
- cmpb %bl, %cl
- jne L(end)
- cmp %bx, %cx
- jne L(end)
- movzbl -1(%eax), %eax
- cmpb -1(%edx), %al
- mov $0, %eax
- jne L(end)
- RETURN
-# endif
- .p2align 4
-L(56bytes):
- movdqu -56(%eax), %xmm1
- movdqu -56(%edx), %xmm2
- mov $-56, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(40bytes):
- mov $-40, %ebx
- movdqu -40(%eax), %xmm1
- movdqu -40(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(24bytes):
- mov $-24, %ebx
- movdqu -24(%eax), %xmm1
- movdqu -24(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-
- mov -8(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -8(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -8(%edx), %ecx
-# endif
- jne L(find_diff)
-
- mov -4(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -4(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -4(%edx), %ecx
-# endif
- mov $0, %eax
- jne L(find_diff)
- RETURN
-
-# ifndef USE_AS_WMEMCMP
- .p2align 4
-L(57bytes):
- movdqu -57(%eax), %xmm1
- movdqu -57(%edx), %xmm2
- mov $-57, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(41bytes):
- mov $-41, %ebx
- movdqu -41(%eax), %xmm1
- movdqu -41(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(25bytes):
- mov $-25, %ebx
- movdqu -25(%eax), %xmm1
- movdqu -25(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- mov -9(%eax), %ecx
- mov -9(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- mov -5(%eax), %ecx
- mov -5(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzbl -1(%eax), %ecx
- cmp -1(%edx), %cl
- mov $0, %eax
- jne L(end)
- RETURN
-
- .p2align 4
-L(58bytes):
- movdqu -58(%eax), %xmm1
- movdqu -58(%edx), %xmm2
- mov $-58, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(42bytes):
- mov $-42, %ebx
- movdqu -42(%eax), %xmm1
- movdqu -42(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(26bytes):
- mov $-26, %ebx
- movdqu -26(%eax), %xmm1
- movdqu -26(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-
- mov -10(%eax), %ecx
- mov -10(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-
- mov -6(%eax), %ecx
- mov -6(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-
- movzwl -2(%eax), %ecx
- movzwl -2(%edx), %ebx
- cmp %bl, %cl
- jne L(end)
- cmp %bh, %ch
- mov $0, %eax
- jne L(end)
- RETURN
-
- .p2align 4
-L(59bytes):
- movdqu -59(%eax), %xmm1
- movdqu -59(%edx), %xmm2
- mov $-59, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(43bytes):
- mov $-43, %ebx
- movdqu -43(%eax), %xmm1
- movdqu -43(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(27bytes):
- mov $-27, %ebx
- movdqu -27(%eax), %xmm1
- movdqu -27(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- movl -11(%eax), %ecx
- movl -11(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movl -7(%eax), %ecx
- movl -7(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzwl -3(%eax), %ecx
- movzwl -3(%edx), %ebx
- cmpb %bl, %cl
- jne L(end)
- cmp %bx, %cx
- jne L(end)
- movzbl -1(%eax), %eax
- cmpb -1(%edx), %al
- mov $0, %eax
- jne L(end)
- RETURN
-# endif
- .p2align 4
-L(60bytes):
- movdqu -60(%eax), %xmm1
- movdqu -60(%edx), %xmm2
- mov $-60, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(44bytes):
- mov $-44, %ebx
- movdqu -44(%eax), %xmm1
- movdqu -44(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(28bytes):
- mov $-28, %ebx
- movdqu -28(%eax), %xmm1
- movdqu -28(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-
- mov -12(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -12(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -12(%edx), %ecx
-# endif
- jne L(find_diff)
-
- mov -8(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -8(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -8(%edx), %ecx
-# endif
- jne L(find_diff)
-
- mov -4(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -4(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -4(%edx), %ecx
-# endif
- mov $0, %eax
- jne L(find_diff)
- RETURN
-
-# ifndef USE_AS_WMEMCMP
- .p2align 4
-L(61bytes):
- movdqu -61(%eax), %xmm1
- movdqu -61(%edx), %xmm2
- mov $-61, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(45bytes):
- mov $-45, %ebx
- movdqu -45(%eax), %xmm1
- movdqu -45(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(29bytes):
- mov $-29, %ebx
- movdqu -29(%eax), %xmm1
- movdqu -29(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-
- mov -13(%eax), %ecx
- mov -13(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-
- mov -9(%eax), %ecx
- mov -9(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-
- mov -5(%eax), %ecx
- mov -5(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzbl -1(%eax), %ecx
- cmp -1(%edx), %cl
- mov $0, %eax
- jne L(end)
- RETURN
-
- .p2align 4
-L(62bytes):
- movdqu -62(%eax), %xmm1
- movdqu -62(%edx), %xmm2
- mov $-62, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(46bytes):
- mov $-46, %ebx
- movdqu -46(%eax), %xmm1
- movdqu -46(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(30bytes):
- mov $-30, %ebx
- movdqu -30(%eax), %xmm1
- movdqu -30(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- mov -14(%eax), %ecx
- mov -14(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- mov -10(%eax), %ecx
- mov -10(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- mov -6(%eax), %ecx
- mov -6(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzwl -2(%eax), %ecx
- movzwl -2(%edx), %ebx
- cmp %bl, %cl
- jne L(end)
- cmp %bh, %ch
- mov $0, %eax
- jne L(end)
- RETURN
-
- .p2align 4
-L(63bytes):
- movdqu -63(%eax), %xmm1
- movdqu -63(%edx), %xmm2
- mov $-63, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(47bytes):
- mov $-47, %ebx
- movdqu -47(%eax), %xmm1
- movdqu -47(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(31bytes):
- mov $-31, %ebx
- movdqu -31(%eax), %xmm1
- movdqu -31(%edx), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-
- movl -15(%eax), %ecx
- movl -15(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movl -11(%eax), %ecx
- movl -11(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movl -7(%eax), %ecx
- movl -7(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
- movzwl -3(%eax), %ecx
- movzwl -3(%edx), %ebx
- cmpb %bl, %cl
- jne L(end)
- cmp %bx, %cx
- jne L(end)
- movzbl -1(%eax), %eax
- cmpb -1(%edx), %al
- mov $0, %eax
- jne L(end)
- RETURN
-# endif
-
- .p2align 4
-L(64bytes):
- movdqu -64(%eax), %xmm1
- movdqu -64(%edx), %xmm2
- mov $-64, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(48bytes):
- movdqu -48(%eax), %xmm1
- movdqu -48(%edx), %xmm2
- mov $-48, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(32bytes):
- movdqu -32(%eax), %xmm1
- movdqu -32(%edx), %xmm2
- mov $-32, %ebx
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-
- mov -16(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -16(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -16(%edx), %ecx
-# endif
- jne L(find_diff)
-
- mov -12(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -12(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -12(%edx), %ecx
-# endif
- jne L(find_diff)
-
- mov -8(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -8(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -8(%edx), %ecx
-# endif
- jne L(find_diff)
-
- mov -4(%eax), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -4(%edx), %ebx
- cmp %ebx, %ecx
-# else
- cmp -4(%edx), %ecx
-# endif
- mov $0, %eax
- jne L(find_diff)
- RETURN
-
-# ifndef USE_AS_WMEMCMP
- .p2align 4
-L(less16bytes):
- add %ebx, %eax
- add %ebx, %edx
-
- mov (%eax), %ecx
- mov (%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-
- mov 4(%eax), %ecx
- mov 4(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-
- mov 8(%eax), %ecx
- mov 8(%edx), %ebx
- cmp %ebx, %ecx
- jne L(find_diff)
-
- mov 12(%eax), %ecx
- mov 12(%edx), %ebx
- cmp %ebx, %ecx
- mov $0, %eax
- jne L(find_diff)
- RETURN
-# else
- .p2align 4
-L(less16bytes):
- add %ebx, %eax
- add %ebx, %edx
-
- mov (%eax), %ecx
- cmp (%edx), %ecx
- jne L(find_diff)
-
- mov 4(%eax), %ecx
- cmp 4(%edx), %ecx
- jne L(find_diff)
-
- mov 8(%eax), %ecx
- cmp 8(%edx), %ecx
- jne L(find_diff)
-
- mov 12(%eax), %ecx
- cmp 12(%edx), %ecx
-
- mov $0, %eax
- jne L(find_diff)
- RETURN
-# endif
-
- .p2align 4
-L(find_diff):
-# ifndef USE_AS_WMEMCMP
- cmpb %bl, %cl
- jne L(end)
- cmp %bx, %cx
- jne L(end)
- shr $16,%ecx
- shr $16,%ebx
- cmp %bl, %cl
- jne L(end)
- cmp %bx, %cx
-L(end):
- POP (%ebx)
- mov $1, %eax
- ja L(bigger)
- neg %eax
-L(bigger):
- ret
-# else
- POP (%ebx)
- mov $1, %eax
- jg L(bigger)
- neg %eax
- ret
-
- .p2align 4
-L(bigger):
- ret
-# endif
-END (MEMCMP)
-
- .section .rodata.sse4.2,"a",@progbits
- .p2align 2
- .type L(table_64bytes), @object
-# ifndef USE_AS_WMEMCMP
-L(table_64bytes):
- .int JMPTBL (L(0bytes), L(table_64bytes))
- .int JMPTBL (L(1bytes), L(table_64bytes))
- .int JMPTBL (L(2bytes), L(table_64bytes))
- .int JMPTBL (L(3bytes), L(table_64bytes))
- .int JMPTBL (L(4bytes), L(table_64bytes))
- .int JMPTBL (L(5bytes), L(table_64bytes))
- .int JMPTBL (L(6bytes), L(table_64bytes))
- .int JMPTBL (L(7bytes), L(table_64bytes))
- .int JMPTBL (L(8bytes), L(table_64bytes))
- .int JMPTBL (L(9bytes), L(table_64bytes))
- .int JMPTBL (L(10bytes), L(table_64bytes))
- .int JMPTBL (L(11bytes), L(table_64bytes))
- .int JMPTBL (L(12bytes), L(table_64bytes))
- .int JMPTBL (L(13bytes), L(table_64bytes))
- .int JMPTBL (L(14bytes), L(table_64bytes))
- .int JMPTBL (L(15bytes), L(table_64bytes))
- .int JMPTBL (L(16bytes), L(table_64bytes))
- .int JMPTBL (L(17bytes), L(table_64bytes))
- .int JMPTBL (L(18bytes), L(table_64bytes))
- .int JMPTBL (L(19bytes), L(table_64bytes))
- .int JMPTBL (L(20bytes), L(table_64bytes))
- .int JMPTBL (L(21bytes), L(table_64bytes))
- .int JMPTBL (L(22bytes), L(table_64bytes))
- .int JMPTBL (L(23bytes), L(table_64bytes))
- .int JMPTBL (L(24bytes), L(table_64bytes))
- .int JMPTBL (L(25bytes), L(table_64bytes))
- .int JMPTBL (L(26bytes), L(table_64bytes))
- .int JMPTBL (L(27bytes), L(table_64bytes))
- .int JMPTBL (L(28bytes), L(table_64bytes))
- .int JMPTBL (L(29bytes), L(table_64bytes))
- .int JMPTBL (L(30bytes), L(table_64bytes))
- .int JMPTBL (L(31bytes), L(table_64bytes))
- .int JMPTBL (L(32bytes), L(table_64bytes))
- .int JMPTBL (L(33bytes), L(table_64bytes))
- .int JMPTBL (L(34bytes), L(table_64bytes))
- .int JMPTBL (L(35bytes), L(table_64bytes))
- .int JMPTBL (L(36bytes), L(table_64bytes))
- .int JMPTBL (L(37bytes), L(table_64bytes))
- .int JMPTBL (L(38bytes), L(table_64bytes))
- .int JMPTBL (L(39bytes), L(table_64bytes))
- .int JMPTBL (L(40bytes), L(table_64bytes))
- .int JMPTBL (L(41bytes), L(table_64bytes))
- .int JMPTBL (L(42bytes), L(table_64bytes))
- .int JMPTBL (L(43bytes), L(table_64bytes))
- .int JMPTBL (L(44bytes), L(table_64bytes))
- .int JMPTBL (L(45bytes), L(table_64bytes))
- .int JMPTBL (L(46bytes), L(table_64bytes))
- .int JMPTBL (L(47bytes), L(table_64bytes))
- .int JMPTBL (L(48bytes), L(table_64bytes))
- .int JMPTBL (L(49bytes), L(table_64bytes))
- .int JMPTBL (L(50bytes), L(table_64bytes))
- .int JMPTBL (L(51bytes), L(table_64bytes))
- .int JMPTBL (L(52bytes), L(table_64bytes))
- .int JMPTBL (L(53bytes), L(table_64bytes))
- .int JMPTBL (L(54bytes), L(table_64bytes))
- .int JMPTBL (L(55bytes), L(table_64bytes))
- .int JMPTBL (L(56bytes), L(table_64bytes))
- .int JMPTBL (L(57bytes), L(table_64bytes))
- .int JMPTBL (L(58bytes), L(table_64bytes))
- .int JMPTBL (L(59bytes), L(table_64bytes))
- .int JMPTBL (L(60bytes), L(table_64bytes))
- .int JMPTBL (L(61bytes), L(table_64bytes))
- .int JMPTBL (L(62bytes), L(table_64bytes))
- .int JMPTBL (L(63bytes), L(table_64bytes))
- .int JMPTBL (L(64bytes), L(table_64bytes))
-# else
-L(table_64bytes):
- .int JMPTBL (L(0bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(4bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(8bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(12bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(16bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(20bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(24bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(28bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(32bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(36bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(40bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(44bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(48bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(52bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(56bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(60bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(64bytes), L(table_64bytes))
-# endif
-#endif