diff options
author | H.J. Lu <hongjiu.lu@intel.com> | 2010-02-24 18:11:35 -0800 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2010-02-24 18:11:35 -0800 |
commit | cc50f1a4b458f769ceb72d88bb78c8429361fec1 (patch) | |
tree | f2ea40d2ac747a5cf784e6ff58582c06e1c999a8 /sysdeps/i386/i686/multiarch/memset-sse2-rep.S | |
parent | 7ca890b88e6ab7624afb1742a9fffb37ad5b3fc3 (diff) | |
download | glibc-cc50f1a4b458f769ceb72d88bb78c8429361fec1.tar.gz |
Fix issues in x86 memset-sse2.S/memset-sse2-rep.S
Diffstat (limited to 'sysdeps/i386/i686/multiarch/memset-sse2-rep.S')
-rw-r--r-- | sysdeps/i386/i686/multiarch/memset-sse2-rep.S | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S index 84afffeb66..f9a0b13d0c 100644 --- a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S +++ b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S @@ -243,7 +243,6 @@ L(32bytesormore): pxor %xmm0, %xmm0 #else movd %eax, %xmm0 - punpcklbw %xmm0, %xmm0 pshufd $0, %xmm0, %xmm0 #endif testl $0xf, %edx @@ -261,7 +260,7 @@ L(not_aligned_16): ALIGN (4) L(aligned_16): cmp $128, %ecx - jge L(128bytesormore) + jae L(128bytesormore) L(aligned_16_less128bytes): BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) @@ -293,7 +292,7 @@ L(128bytesormore): * fast string will prefetch and combine data efficiently. */ cmp %edi, %ecx - jae L(128bytesormore_nt) + jae L(128bytesormore_endof_L1) subl $128, %ecx L(128bytesormore_normal): sub $128, %ecx @@ -306,7 +305,7 @@ L(128bytesormore_normal): movdqa %xmm0, 0x60(%edx) movdqa %xmm0, 0x70(%edx) lea 128(%edx), %edx - jl L(128bytesless_normal) + jb L(128bytesless_normal) sub $128, %ecx @@ -319,15 +318,16 @@ L(128bytesormore_normal): movdqa %xmm0, 0x60(%edx) movdqa %xmm0, 0x70(%edx) lea 128(%edx), %edx - jge L(128bytesormore_normal) + jae L(128bytesormore_normal) L(128bytesless_normal): POP (%edi) - lea 128(%ecx), %ecx + add $128, %ecx BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) + CFI_PUSH (%edi) ALIGN (4) -L(128bytesormore_nt): +L(128bytesormore_endof_L1): mov %edx, %edi mov %ecx, %edx shr $2, %ecx |