diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2016-03-04 08:37:40 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2016-03-04 08:39:07 -0800 |
commit | 14a1d7cc4c4fd5ee8e4e66b777221dd32a84efe8 (patch) | |
tree | 86611a9511bcc3cafb5de83890af6c0508e569a9 /sysdeps/x86_64/multiarch/memcpy.S | |
parent | 4b230f6a60f3bb9cae92306d016535f40578ff2e (diff) | |
download | glibc-14a1d7cc4c4fd5ee8e4e66b777221dd32a84efe8.tar.gz |
x86-64: Fix memcpy IFUNC selection
Chek Fast_Unaligned_Load, instead of Slow_BSF, and also check for
Fast_Copy_Backward to enable __memcpy_ssse3_back. Existing selection
order is updated with following selection order:
1. __memcpy_avx_unaligned if AVX_Fast_Unaligned_Load bit is set.
2. __memcpy_sse2_unaligned if Fast_Unaligned_Load bit is set.
3. __memcpy_sse2 if SSSE3 isn't available.
4. __memcpy_ssse3_back if Fast_Copy_Backward bit it set.
5. __memcpy_ssse3
[BZ #18880]
* sysdeps/x86_64/multiarch/memcpy.S: Check Fast_Unaligned_Load,
instead of Slow_BSF, and also check for Fast_Copy_Backward to
enable __memcpy_ssse3_back.
Diffstat (limited to 'sysdeps/x86_64/multiarch/memcpy.S')
-rw-r--r-- | sysdeps/x86_64/multiarch/memcpy.S | 27 |
1 files changed, 14 insertions, 13 deletions
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S index 64a1bcd137..8882590e51 100644 --- a/sysdeps/x86_64/multiarch/memcpy.S +++ b/sysdeps/x86_64/multiarch/memcpy.S @@ -35,22 +35,23 @@ ENTRY(__new_memcpy) jz 1f HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) jz 1f - leaq __memcpy_avx512_no_vzeroupper(%rip), %rax + lea __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP ret #endif -1: leaq __memcpy_avx_unaligned(%rip), %rax +1: lea __memcpy_avx_unaligned(%rip), %RAX_LP HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) - jz 2f - ret -2: leaq __memcpy_sse2(%rip), %rax - HAS_ARCH_FEATURE (Slow_BSF) - jnz 3f - leaq __memcpy_sse2_unaligned(%rip), %rax - ret -3: HAS_CPU_FEATURE (SSSE3) - jz 4f - leaq __memcpy_ssse3(%rip), %rax -4: ret + jnz 2f + lea __memcpy_sse2_unaligned(%rip), %RAX_LP + HAS_ARCH_FEATURE (Fast_Unaligned_Load) + jnz 2f + lea __memcpy_sse2(%rip), %RAX_LP + HAS_CPU_FEATURE (SSSE3) + jz 2f + lea __memcpy_ssse3_back(%rip), %RAX_LP + HAS_ARCH_FEATURE (Fast_Copy_Backward) + jnz 2f + lea __memcpy_ssse3(%rip), %RAX_LP +2: ret END(__new_memcpy) # undef ENTRY |