summaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/multiarch/strcmp-sse42.S
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2017-06-21 12:10:50 -0700
committerH.J. Lu <hjl.tools@gmail.com>2017-06-21 12:11:06 -0700
commit11ffcacb64a939c10cfc713746b8ec88837f5c4a (patch)
tree628e976f8ba6f657300dd0ed4705ee5e8f67f0fa /sysdeps/x86_64/multiarch/strcmp-sse42.S
parent9649350d2ee47fae00794d57e2526aa5d67d900e (diff)
downloadglibc-11ffcacb64a939c10cfc713746b8ec88837f5c4a.tar.gz
x86-64: Implement strcmp family IFUNC selectors in C
Implement strcmp family IFUNC selectors in C. All internal calls within libc.so can use IFUNC on x86-64 since unlike x86, x86-64 supports PC-relative addressing to access the GOT entry so that it can call via PLT without using an extra register. For libc.a, we can't use IFUNC for functions which are called before IFUNC has been initialized. Use IFUNC internally reduces the icache footprint since libc.so and other codes in the process use the same implementations. This patch uses IFUNC for strcmp family functions within libc. * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add strcmp-sse2, strcmp-sse4_2, strncmp-sse2, strncmp-sse4_2, strcasecmp_l-sse2, strcasecmp_l-sse4_2, strcasecmp_l-avx, strncase_l-sse2, strncase_l-sse4_2 and strncase_l-avx. * sysdeps/x86_64/multiarch/ifunc-strcasecmp.h: New file. * sysdeps/x86_64/multiarch/strcasecmp.c: Likewise. * sysdeps/x86_64/multiarch/strcasecmp_l-avx.S: Likewise. * sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S: Likewise. * sysdeps/x86_64/multiarch/strcasecmp_l-sse4_2.S: Likewise. * sysdeps/x86_64/multiarch/strcasecmp_l.c: Likewise. * sysdeps/x86_64/multiarch/strcmp-sse2.S: Likewise. * sysdeps/x86_64/multiarch/strcmp-sse4_2.S: Likewise. * sysdeps/x86_64/multiarch/strcmp.c: Likewise. * sysdeps/x86_64/multiarch/strncase.c: Likewise. * sysdeps/x86_64/multiarch/strncase_l-avx.S : Likewise. * sysdeps/x86_64/multiarch/strncase_l-sse2.S: Likewise. * sysdeps/x86_64/multiarch/strncase_l-sse4_2.S: Likewise. * sysdeps/x86_64/multiarch/strncase_l.c: Likewise. * sysdeps/x86_64/multiarch/strncmp-sse2.S: Likewise. * sysdeps/x86_64/multiarch/strncmp-sse4_2.S: Likewise. * sysdeps/x86_64/multiarch/strncmp.c: Likewise. * sysdeps/x86_64/multiarch/strcasecmp_l.S: Removed. * sysdeps/x86_64/multiarch/strcmp.S: Likewise. * sysdeps/x86_64/multiarch/strncase_l.S: Likewise. * sysdeps/x86_64/multiarch/strncmp.S: Likewise. * sysdeps/x86_64/multiarch/strcmp-sse42.S: Include <sysdep.h>. (STRCMP_SSE42): New. Defined to __strcmp_sse42 if not defined. [USE_AS_STRCASECMP_L || USE_AS_STRNCASECMP_L]: Include "locale-defines.h". (UPDATE_STRNCMP_COUNTER): New. (SECTION): Likewise. (GLABEL): Likewise. (LABEL): Likewise. * sysdeps/x86_64/multiarch/strncmp-ssse3.S: Rewrite and enable for libc.a.
Diffstat (limited to 'sysdeps/x86_64/multiarch/strcmp-sse42.S')
-rw-r--r--sysdeps/x86_64/multiarch/strcmp-sse42.S34
1 files changed, 34 insertions, 0 deletions
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S
index ed26d4a8fb..4aeb14e175 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S
@@ -17,6 +17,40 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#include <sysdep.h>
+
+#ifndef STRCMP_SSE42
+# define STRCMP_SSE42 __strcmp_sse42
+#endif
+
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# include "locale-defines.h"
+#endif
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
+ if the new counter > the old one or is 0. */
+# define UPDATE_STRNCMP_COUNTER \
+ /* calculate left number to compare */ \
+ lea -16(%rcx, %r11), %r9; \
+ cmp %r9, %r11; \
+ jb LABEL(strcmp_exitz); \
+ test %r9, %r9; \
+ je LABEL(strcmp_exitz); \
+ mov %r9, %r11
+#else
+# define UPDATE_STRNCMP_COUNTER
+#endif
+
+#ifdef USE_AVX
+# define SECTION avx
+# define GLABEL(l) l##_avx
+#else
+# define SECTION sse4.2
+# define GLABEL(l) l##_sse42
+#endif
+
+#define LABEL(l) .L##l
/* We use 0x1a:
_SIDD_SBYTE_OPS