summaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/multiarch/memcpy.S
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2016-03-04 08:37:40 -0800
committerH.J. Lu <hjl.tools@gmail.com>2016-03-04 08:39:07 -0800
commit14a1d7cc4c4fd5ee8e4e66b777221dd32a84efe8 (patch)
tree86611a9511bcc3cafb5de83890af6c0508e569a9 /sysdeps/x86_64/multiarch/memcpy.S
parent4b230f6a60f3bb9cae92306d016535f40578ff2e (diff)
downloadglibc-14a1d7cc4c4fd5ee8e4e66b777221dd32a84efe8.tar.gz
x86-64: Fix memcpy IFUNC selection
Chek Fast_Unaligned_Load, instead of Slow_BSF, and also check for Fast_Copy_Backward to enable __memcpy_ssse3_back. Existing selection order is updated with following selection order: 1. __memcpy_avx_unaligned if AVX_Fast_Unaligned_Load bit is set. 2. __memcpy_sse2_unaligned if Fast_Unaligned_Load bit is set. 3. __memcpy_sse2 if SSSE3 isn't available. 4. __memcpy_ssse3_back if Fast_Copy_Backward bit it set. 5. __memcpy_ssse3 [BZ #18880] * sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load, instead of Slow_BSF, and also check for Fast_Copy_Backward to enable __memcpy_ssse3_back.
Diffstat (limited to 'sysdeps/x86_64/multiarch/memcpy.S')
-rw-r--r--sysdeps/x86_64/multiarch/memcpy.S27
1 files changed, 14 insertions, 13 deletions
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index 64a1bcd137..8882590e51 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -35,22 +35,23 @@ ENTRY(__new_memcpy)
jz 1f
HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
jz 1f
- leaq __memcpy_avx512_no_vzeroupper(%rip), %rax
+ lea __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP
ret
#endif
-1: leaq __memcpy_avx_unaligned(%rip), %rax
+1: lea __memcpy_avx_unaligned(%rip), %RAX_LP
HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
- jz 2f
- ret
-2: leaq __memcpy_sse2(%rip), %rax
- HAS_ARCH_FEATURE (Slow_BSF)
- jnz 3f
- leaq __memcpy_sse2_unaligned(%rip), %rax
- ret
-3: HAS_CPU_FEATURE (SSSE3)
- jz 4f
- leaq __memcpy_ssse3(%rip), %rax
-4: ret
+ jnz 2f
+ lea __memcpy_sse2_unaligned(%rip), %RAX_LP
+ HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+ jnz 2f
+ lea __memcpy_sse2(%rip), %RAX_LP
+ HAS_CPU_FEATURE (SSSE3)
+ jz 2f
+ lea __memcpy_ssse3_back(%rip), %RAX_LP
+ HAS_ARCH_FEATURE (Fast_Copy_Backward)
+ jnz 2f
+ lea __memcpy_ssse3(%rip), %RAX_LP
+2: ret
END(__new_memcpy)
# undef ENTRY