diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2015-08-20 15:20:58 -0700 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2015-08-20 16:25:22 -0700 |
commit | 6df87253b289e5d51e01f1a45353294f3dd65a6e (patch) | |
tree | 119f4dfcdd1f4e2502b184f0896b59702620fdec /sysdeps/i386/i686/multiarch/strlen-sse2.S | |
parent | 6458faf199de42a3bba3dc7cad9fb38d66bf0b4d (diff) | |
download | glibc-6df87253b289e5d51e01f1a45353294f3dd65a6e.tar.gz |
Add i386 strcat multiarch functionshjl/i486/multiarch-old
Diffstat (limited to 'sysdeps/i386/i686/multiarch/strlen-sse2.S')
-rw-r--r-- | sysdeps/i386/i686/multiarch/strlen-sse2.S | 695 |
1 files changed, 0 insertions, 695 deletions
diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2.S b/sysdeps/i386/i686/multiarch/strlen-sse2.S deleted file mode 100644 index 3d30714b7a..0000000000 --- a/sysdeps/i386/i686/multiarch/strlen-sse2.S +++ /dev/null @@ -1,695 +0,0 @@ -/* strlen with SSE2 - Copyright (C) 2010-2015 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* for strlen only SHARED version is optimized, for strcat, strncat, strnlen both STATIC and SHARED are optimized */ - -#if (defined USE_AS_STRNLEN || defined USE_AS_STRCAT || defined SHARED) && IS_IN (libc) - -# ifndef USE_AS_STRCAT - -# include <sysdep.h> -# define PARMS 4 -# define STR PARMS -# define RETURN ret - -# ifdef USE_AS_STRNLEN -# define LEN PARMS + 8 -# define CFI_PUSH(REG) \ - cfi_adjust_cfa_offset (4); \ - cfi_rel_offset (REG, 0) - -# define CFI_POP(REG) \ - cfi_adjust_cfa_offset (-4); \ - cfi_restore (REG) - -# define PUSH(REG) pushl REG; CFI_PUSH (REG) -# define POP(REG) popl REG; CFI_POP (REG) -# undef RETURN -# define RETURN POP (%edi); CFI_PUSH(%edi); ret -# endif - -# ifndef STRLEN -# define STRLEN __strlen_sse2 -# endif - - atom_text_section -ENTRY (STRLEN) - mov STR(%esp), %edx -# ifdef USE_AS_STRNLEN - PUSH (%edi) - movl LEN(%esp), %edi - sub $4, %edi - jbe L(len_less4_prolog) -# endif -# endif - xor %eax, %eax - cmpb $0, (%edx) - jz L(exit_tail0) - cmpb $0, 1(%edx) - jz L(exit_tail1) - cmpb $0, 2(%edx) - jz L(exit_tail2) - cmpb $0, 3(%edx) - jz L(exit_tail3) - -# ifdef USE_AS_STRNLEN - sub $4, %edi - jbe L(len_less8_prolog) -# endif - - cmpb $0, 4(%edx) - jz L(exit_tail4) - cmpb $0, 5(%edx) - jz L(exit_tail5) - cmpb $0, 6(%edx) - jz L(exit_tail6) - cmpb $0, 7(%edx) - jz L(exit_tail7) - -# ifdef USE_AS_STRNLEN - sub $4, %edi - jbe L(len_less12_prolog) -# endif - - cmpb $0, 8(%edx) - jz L(exit_tail8) - cmpb $0, 9(%edx) - jz L(exit_tail9) - cmpb $0, 10(%edx) - jz L(exit_tail10) - cmpb $0, 11(%edx) - jz L(exit_tail11) - -# ifdef USE_AS_STRNLEN - sub $4, %edi - jbe L(len_less16_prolog) -# endif - - cmpb $0, 12(%edx) - jz L(exit_tail12) - cmpb $0, 13(%edx) - jz L(exit_tail13) - cmpb $0, 14(%edx) - jz L(exit_tail14) - cmpb $0, 15(%edx) - jz L(exit_tail15) - - pxor %xmm0, %xmm0 - lea 16(%edx), %eax - mov %eax, %ecx - and $-16, %eax - -# ifdef USE_AS_STRNLEN - and $15, %edx - add %edx, %edi - sub $64, %edi - jbe L(len_less64) -# endif - - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - pxor %xmm1, %xmm1 - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edx - pxor %xmm2, %xmm2 - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm2 - pmovmskb %xmm2, %edx - pxor %xmm3, %xmm3 - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - -# ifdef USE_AS_STRNLEN - sub $64, %edi - jbe L(len_less64) -# endif - - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - -# ifdef USE_AS_STRNLEN - sub $64, %edi - jbe L(len_less64) -# endif - - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - -# ifdef USE_AS_STRNLEN - sub $64, %edi - jbe L(len_less64) -# endif - - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - - pcmpeqb (%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%eax), %eax - jnz L(exit) - -# ifdef USE_AS_STRNLEN - mov %eax, %edx - and $63, %edx - add %edx, %edi -# endif - - and $-0x40, %eax - - .p2align 4 -L(aligned_64_loop): -# ifdef USE_AS_STRNLEN - sub $64, %edi - jbe L(len_less64) -# endif - movaps (%eax), %xmm0 - movaps 16(%eax), %xmm1 - movaps 32(%eax), %xmm2 - movaps 48(%eax), %xmm6 - pminub %xmm1, %xmm0 - pminub %xmm6, %xmm2 - pminub %xmm0, %xmm2 - pcmpeqb %xmm3, %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - lea 64(%eax), %eax - jz L(aligned_64_loop) - - pcmpeqb -64(%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 48(%ecx), %ecx - jnz L(exit) - - pcmpeqb %xmm1, %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea -16(%ecx), %ecx - jnz L(exit) - - pcmpeqb -32(%eax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea -16(%ecx), %ecx - jnz L(exit) - - pcmpeqb %xmm6, %xmm3 - pmovmskb %xmm3, %edx - lea -16(%ecx), %ecx -L(exit): - sub %ecx, %eax - test %dl, %dl - jz L(exit_high) - - mov %dl, %cl - and $15, %cl - jz L(exit_8) - test $0x01, %dl - jnz L(exit_tail0) - test $0x02, %dl - jnz L(exit_tail1) - test $0x04, %dl - jnz L(exit_tail2) - add $3, %eax - RETURN - - .p2align 4 -L(exit_8): - test $0x10, %dl - jnz L(exit_tail4) - test $0x20, %dl - jnz L(exit_tail5) - test $0x40, %dl - jnz L(exit_tail6) - add $7, %eax - RETURN - - .p2align 4 -L(exit_high): - mov %dh, %ch - and $15, %ch - jz L(exit_high_8) - test $0x01, %dh - jnz L(exit_tail8) - test $0x02, %dh - jnz L(exit_tail9) - test $0x04, %dh - jnz L(exit_tail10) - add $11, %eax - RETURN - - .p2align 4 -L(exit_high_8): - test $0x10, %dh - jnz L(exit_tail12) - test $0x20, %dh - jnz L(exit_tail13) - test $0x40, %dh - jnz L(exit_tail14) - add $15, %eax -L(exit_tail0): - RETURN - -# ifdef USE_AS_STRNLEN - - .p2align 4 -L(len_less64): - pxor %xmm0, %xmm0 - add $64, %edi - - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - pxor %xmm1, %xmm1 - lea 16(%eax), %eax - test %edx, %edx - jnz L(strnlen_exit) - - sub $16, %edi - jbe L(return_start_len) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edx - lea 16(%eax), %eax - test %edx, %edx - jnz L(strnlen_exit) - - sub $16, %edi - jbe L(return_start_len) - - pcmpeqb (%eax), %xmm0 - pmovmskb %xmm0, %edx - lea 16(%eax), %eax - test %edx, %edx - jnz L(strnlen_exit) - - sub $16, %edi - jbe L(return_start_len) - - pcmpeqb (%eax), %xmm1 - pmovmskb %xmm1, %edx - lea 16(%eax), %eax - test %edx, %edx - jnz L(strnlen_exit) - - movl LEN(%esp), %eax - RETURN - - .p2align 4 -L(strnlen_exit): - sub %ecx, %eax - - test %dl, %dl - jz L(strnlen_exit_high) - mov %dl, %cl - and $15, %cl - jz L(strnlen_exit_8) - test $0x01, %dl - jnz L(exit_tail0) - test $0x02, %dl - jnz L(strnlen_exit_tail1) - test $0x04, %dl - jnz L(strnlen_exit_tail2) - sub $4, %edi - jb L(return_start_len) - lea 3(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_8): - test $0x10, %dl - jnz L(strnlen_exit_tail4) - test $0x20, %dl - jnz L(strnlen_exit_tail5) - test $0x40, %dl - jnz L(strnlen_exit_tail6) - sub $8, %edi - jb L(return_start_len) - lea 7(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_high): - mov %dh, %ch - and $15, %ch - jz L(strnlen_exit_high_8) - test $0x01, %dh - jnz L(strnlen_exit_tail8) - test $0x02, %dh - jnz L(strnlen_exit_tail9) - test $0x04, %dh - jnz L(strnlen_exit_tail10) - sub $12, %edi - jb L(return_start_len) - lea 11(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_high_8): - test $0x10, %dh - jnz L(strnlen_exit_tail12) - test $0x20, %dh - jnz L(strnlen_exit_tail13) - test $0x40, %dh - jnz L(strnlen_exit_tail14) - sub $16, %edi - jb L(return_start_len) - lea 15(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail1): - sub $2, %edi - jb L(return_start_len) - lea 1(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail2): - sub $3, %edi - jb L(return_start_len) - lea 2(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail4): - sub $5, %edi - jb L(return_start_len) - lea 4(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail5): - sub $6, %edi - jb L(return_start_len) - lea 5(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail6): - sub $7, %edi - jb L(return_start_len) - lea 6(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail8): - sub $9, %edi - jb L(return_start_len) - lea 8(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail9): - sub $10, %edi - jb L(return_start_len) - lea 9(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail10): - sub $11, %edi - jb L(return_start_len) - lea 10(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail12): - sub $13, %edi - jb L(return_start_len) - lea 12(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail13): - sub $14, %edi - jb L(return_start_len) - lea 13(%eax), %eax - RETURN - - .p2align 4 -L(strnlen_exit_tail14): - sub $15, %edi - jb L(return_start_len) - lea 14(%eax), %eax - RETURN - - .p2align 4 -L(return_start_len): - movl LEN(%esp), %eax - RETURN - -/* for prolog only */ - - .p2align 4 -L(len_less4_prolog): - xor %eax, %eax - - add $4, %edi - jz L(exit_tail0) - - cmpb $0, (%edx) - jz L(exit_tail0) - cmp $1, %edi - je L(exit_tail1) - - cmpb $0, 1(%edx) - jz L(exit_tail1) - cmp $2, %edi - je L(exit_tail2) - - cmpb $0, 2(%edx) - jz L(exit_tail2) - cmp $3, %edi - je L(exit_tail3) - - cmpb $0, 3(%edx) - jz L(exit_tail3) - mov $4, %eax - RETURN - - .p2align 4 -L(len_less8_prolog): - add $4, %edi - - cmpb $0, 4(%edx) - jz L(exit_tail4) - cmp $1, %edi - je L(exit_tail5) - - cmpb $0, 5(%edx) - jz L(exit_tail5) - cmp $2, %edi - je L(exit_tail6) - - cmpb $0, 6(%edx) - jz L(exit_tail6) - cmp $3, %edi - je L(exit_tail7) - - cmpb $0, 7(%edx) - jz L(exit_tail7) - mov $8, %eax - RETURN - - - .p2align 4 -L(len_less12_prolog): - add $4, %edi - - cmpb $0, 8(%edx) - jz L(exit_tail8) - cmp $1, %edi - je L(exit_tail9) - - cmpb $0, 9(%edx) - jz L(exit_tail9) - cmp $2, %edi - je L(exit_tail10) - - cmpb $0, 10(%edx) - jz L(exit_tail10) - cmp $3, %edi - je L(exit_tail11) - - cmpb $0, 11(%edx) - jz L(exit_tail11) - mov $12, %eax - RETURN - - .p2align 4 -L(len_less16_prolog): - add $4, %edi - - cmpb $0, 12(%edx) - jz L(exit_tail12) - cmp $1, %edi - je L(exit_tail13) - - cmpb $0, 13(%edx) - jz L(exit_tail13) - cmp $2, %edi - je L(exit_tail14) - - cmpb $0, 14(%edx) - jz L(exit_tail14) - cmp $3, %edi - je L(exit_tail15) - - cmpb $0, 15(%edx) - jz L(exit_tail15) - mov $16, %eax - RETURN -# endif - - .p2align 4 -L(exit_tail1): - add $1, %eax - RETURN - -L(exit_tail2): - add $2, %eax - RETURN - -L(exit_tail3): - add $3, %eax - RETURN - -L(exit_tail4): - add $4, %eax - RETURN - -L(exit_tail5): - add $5, %eax - RETURN - -L(exit_tail6): - add $6, %eax - RETURN - -L(exit_tail7): - add $7, %eax - RETURN - -L(exit_tail8): - add $8, %eax - RETURN - -L(exit_tail9): - add $9, %eax - RETURN - -L(exit_tail10): - add $10, %eax - RETURN - -L(exit_tail11): - add $11, %eax - RETURN - -L(exit_tail12): - add $12, %eax - RETURN - -L(exit_tail13): - add $13, %eax - RETURN - -L(exit_tail14): - add $14, %eax - RETURN - -L(exit_tail15): - add $15, %eax -# ifndef USE_AS_STRCAT - RETURN -END (STRLEN) -# endif -#endif |