diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2016-03-07 05:47:26 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2016-03-07 09:32:48 -0800 |
commit | 5497a1a41de327211f6072f5695175ea98a5055d (patch) | |
tree | 5e74439ccc26c531152a2260033239e4b13c4549 | |
parent | c39c8cfb1bbb26371746e5b9456d1f3d2c28839d (diff) | |
download | glibc-5497a1a41de327211f6072f5695175ea98a5055d.tar.gz |
Enable __memcpy_chk_sse2_unalignedhjl/pr19776/master
Check Fast_Unaligned_Load for __memcpy_chk_sse2_unaligned. The new
selection order is:
1. __memcpy_chk_avx_unaligned if AVX_Fast_Unaligned_Load bit is set.
2. __memcpy_chk_sse2_unaligned if Fast_Unaligned_Load bit is set.
3. __memcpy_chk_sse2 if SSSE3 isn't available.
4. __memcpy_chk_ssse3_back if Fast_Copy_Backward bit it set.
5. __memcpy_chk_ssse3
[BZ #19776]
* sysdeps/x86_64/multiarch/mempcpy_chk.S (__mempcpy_chk): Check
Fast_Unaligned_Load to enable __mempcpy_chk_sse2_unaligned.
-rw-r--r-- | sysdeps/x86_64/multiarch/memcpy_chk.S | 23 |
1 files changed, 13 insertions, 10 deletions
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S index 648217e971..c009211280 100644 --- a/sysdeps/x86_64/multiarch/memcpy_chk.S +++ b/sysdeps/x86_64/multiarch/memcpy_chk.S @@ -32,22 +32,25 @@ ENTRY(__memcpy_chk) LOAD_RTLD_GLOBAL_RO_RDX #ifdef HAVE_AVX512_ASM_SUPPORT HAS_ARCH_FEATURE (AVX512F_Usable) - jz 1f + jz 1f HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) - jz 1f - leaq __memcpy_chk_avx512_no_vzeroupper(%rip), %rax + jz 1f + lea __memcpy_chk_avx512_no_vzeroupper(%rip), %RAX_LP ret #endif -1: leaq __memcpy_chk_sse2(%rip), %rax +1: lea __memcpy_chk_avx_unaligned(%rip), %RAX_LP + HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) + jnz 2f + lea __memcpy_chk_sse2_unaligned(%rip), %RAX_LP + HAS_ARCH_FEATURE (Fast_Unaligned_Load) + jnz 2f + lea __memcpy_chk_sse2(%rip), %RAX_LP HAS_CPU_FEATURE (SSSE3) jz 2f - leaq __memcpy_chk_ssse3(%rip), %rax + lea __memcpy_chk_ssse3_back(%rip), %RAX_LP HAS_ARCH_FEATURE (Fast_Copy_Backward) - jz 2f - leaq __memcpy_chk_ssse3_back(%rip), %rax - HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) - jz 2f - leaq __memcpy_chk_avx_unaligned(%rip), %rax + jnz 2f + lea __memcpy_chk_ssse3(%rip), %RAX_LP 2: ret END(__memcpy_chk) # else |