summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--ChangeLog107
-rw-r--r--Makefile4
-rw-r--r--NEWS24
-rw-r--r--elf/Makefile21
-rw-r--r--elf/dl-lookup.c2
-rw-r--r--elf/tst-audit6.c28
-rw-r--r--elf/tst-audit7.c1
-rw-r--r--elf/tst-auditmod6a.c46
-rw-r--r--elf/tst-auditmod6b.c220
-rw-r--r--elf/tst-auditmod6c.c225
-rw-r--r--elf/tst-auditmod7a.c1
-rw-r--r--elf/tst-auditmod7b.c218
-rw-r--r--nptl/ChangeLog30
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S115
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/libc-cancellation.S22
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/librt-cancellation.S22
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S2
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S50
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S210
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S58
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h55
-rw-r--r--nptl/sysdeps/x86_64/tcb-offsets.sym10
-rwxr-xr-xsysdeps/i386/configure550
-rw-r--r--sysdeps/i386/configure.in8
-rw-r--r--sysdeps/i386/i686/multiarch/Makefile11
-rw-r--r--sysdeps/i386/i686/multiarch/strcasestr-c.c2
-rw-r--r--sysdeps/i386/i686/multiarch/strcasestr.c1
-rw-r--r--sysdeps/i386/i686/multiarch/strcspn-c.c2
-rw-r--r--sysdeps/i386/i686/multiarch/strcspn.S114
-rw-r--r--sysdeps/i386/i686/multiarch/strlen.S154
-rw-r--r--sysdeps/i386/i686/multiarch/strpbrk-c.c2
-rw-r--r--sysdeps/i386/i686/multiarch/strpbrk.S3
-rw-r--r--sysdeps/i386/i686/multiarch/strspn-c.c2
-rw-r--r--sysdeps/i386/i686/multiarch/strspn.S95
-rw-r--r--sysdeps/i386/i686/multiarch/strstr-c.c12
-rw-r--r--sysdeps/i386/i686/multiarch/strstr.c1
-rw-r--r--sysdeps/x86_64/cacheinfo.c42
-rw-r--r--sysdeps/x86_64/dl-trampoline.S244
-rw-r--r--sysdeps/x86_64/dl-trampoline.h269
-rw-r--r--sysdeps/x86_64/multiarch/Makefile2
-rw-r--r--sysdeps/x86_64/multiarch/rawmemchr.S1
-rw-r--r--sysdeps/x86_64/multiarch/strcmp-ssse3.S3
-rw-r--r--sysdeps/x86_64/multiarch/strcmp.S12
-rw-r--r--sysdeps/x86_64/multiarch/strcspn-c.c6
-rw-r--r--sysdeps/x86_64/multiarch/strlen.S1
-rw-r--r--sysdeps/x86_64/multiarch/strncmp-ssse3.S4
-rw-r--r--sysdeps/x86_64/multiarch/strspn-c.c4
-rw-r--r--sysdeps/x86_64/strcmp.S211
49 files changed, 2735 insertions, 493 deletions
diff --git a/.gitignore b/.gitignore
index d9294bec05..a64fda5108 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,7 @@ stamp.*
*.tgz
*.bz2
=*
+TAGS
TODO
AUTHORS
copyr-*
diff --git a/ChangeLog b/ChangeLog
index 037532075d..81e903cf68 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,110 @@
+2009-08-01 H.J. Lu <hongjiu.lu@intel.com>
+
+ * elf/Makefile (distribute): Add tst-audit6.c tst-auditmod6a.c
+ tst-auditmod6b.c tst-auditmod6c.c tst-audit7.c tst-auditmod7a.c
+ tst-auditmod7b.c.
+ (tests): Add tst-audit6 tst-audit7.
+ (modules-names): Add st-auditmod6a tst-auditmod6b tst-auditmod6c
+ tst-auditmod7a tst-auditmod7b.
+ ($(objpfx)tst-audit6): New.
+ ($(objpfx)tst-audit6.out): Likewise.
+ ($(objpfx)tst-audit7): Likewise.
+ ($(objpfx)tst-audit7.out): Likewise.
+ (tst-audit6-ENV): Likewise.
+ (tst-audit7-ENV): Likewise.
+ (CFLAGS-tst-auditmod6b.c): Likewise.
+ (CFLAGS-tst-auditmod6c.c): Likewise.
+ (CFLAGS-tst-auditmod7b.c): Likewise.
+ * elf/tst-audit6.c: New file.
+ * elf/tst-audit7.c: New file.
+ * elf/tst-auditmod6a.c: New file.
+ * elf/tst-auditmod6b.c: New file.
+ * elf/tst-auditmod6c.c: New file.
+ * elf/tst-auditmod7a.c: New file.
+ * elf/tst-auditmod7b.c: New file.
+ * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Move
+ saving and restoring SSE/AVX registers to ...
+ * sysdeps/x86_64/dl-trampoline.h: This. New file.
+
+2009-08-07 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/i386/i686/multiarch/strcspn.S (STRCSPN): Use PIC
+ only if SHARED is defined.
+ * sysdeps/i386/i686/multiarch/strspn.S (strspn): Likewise.
+
+2009-08-03 Jim Meyering <meyering@redhat.com>
+
+ * sysdeps/i386/configure.in: Use AC_HEADER_CHECK.
+
+2009-08-08 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/x86_64/multiarch/strlen.S: Move SSE4.2 version into the same
+ section as the other functions for this architecture.
+ * sysdeps/x86_64/multiarch/rawmemchr.S: Likewise.
+
+2009-08-07 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/x86_64/strcmp.S: Add support to compile with
+ USE_SSSE3. In this case palignr is used.
+ * sysdeps/x86_64/multiarch/strcmp.S (strcmp): If SSE4.3 is not
+ available but SSSE3 is, pick __str{,n}cmp_ssse3.
+ * sysdeps/x86_64/multiarch/Makefile [subdir=string] (sysdep_routines):
+ Add strcmp-ssse3 and strncmp-ssse3.
+ * sysdeps/x86_64/multiarch/strcmp-ssse3.S: New file.
+ * sysdeps/x86_64/multiarch/strncmp-ssse3.S: New file.
+
+ * sysdeps/x86_64/multiarch/strcspn-c.c (STRCSPN_SSE42): Avoid
+ warning through fake initialization.
+
+2009-08-07 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/i386/i686/multiarch/strlen.S (ENTRY): Add the missing "; \".
+
+2009-08-07 Andreas Schwab <schwab@redhat.com>
+
+ * elf/dl-lookup.c (do_lookup_x): Enter correct name into table of
+ unique symbols.
+
+2009-08-05 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/x86_64/cacheinfo.c (init_cacheinfo): Properly use
+ EBX from EAX = 1. Handle EAX = 11.
+
+2009-08-07 Andreas Schwab <schwab@redhat.com>
+
+ * Makefile (TAGS): Use separate sed -e expressions to avoid \
+ inside ''.
+
+2009-08-03 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/i386/i686/multiarch/strcspn.S: Add comments for no
+ hidden IFUNC functions.
+ * sysdeps/i386/i686/multiarch/strspn.S: Likewise.
+
+ * sysdeps/i386/i686/multiarch/strlen.S: New file.
+
+ * sysdeps/i386/i686/multiarch/Makefile [subdir=string]
+ (sysdep_routines): Add strcspn-c, strpbrk-c, strspn-c, strstr-c, and
+ strcasestr-c.
+ (CFLAGS-strcspn-c.c): Define.
+ (CFLAGS-strpbrk-c.c): Define.
+ (CFLAGS-strspn-c.c): Define.
+ (CFLAGS-strstr.c): Define.
+ (CFLAGS-strcasestr.c): Define.
+ * sysdeps/i386/i686/multiarch/strcspn-c.c: New file.
+ * sysdeps/i386/i686/multiarch/strcspn.S: New file.
+ * sysdeps/i386/i686/multiarch/strpbrk-c.c: New file.
+ * sysdeps/i386/i686/multiarch/strpbrk.S: New file.
+ * sysdeps/i386/i686/multiarch/strspn-c.c: New file.
+ * sysdeps/i386/i686/multiarch/strspn.S: New file.
+ * sysdeps/i386/i686/multiarch/strstr-c.c: New file.
+ * sysdeps/i386/i686/multiarch/strstr.c: New file.
+ * sysdeps/i386/i686/multiarch/strcasestr-c.c: New file.
+ * sysdeps/i386/i686/multiarch/strcasestr.c: New file.
+ * sysdeps/x86_64/multiarch/strcspn-c.c (STRCSPN_SSE42): Use
+ -16L instead of 0xfffffffffffffff0L.
+ * sysdeps/x86_64/multiarch/strspn-c.c (__strspn_sse42): Likewise.
+
2009-08-02 Ulrich Drepper <drepper@redhat.com>
* sysdeps/i386/configure.in: Add test for <cpuid.h>.
diff --git a/Makefile b/Makefile
index cab5ff3a17..e346979c03 100644
--- a/Makefile
+++ b/Makefile
@@ -341,9 +341,9 @@ endif
.PHONY: TAGS
TAGS:
- scripts/list-sources.sh | sed -n '/Makefile/p;\
+ scripts/list-sources.sh | sed -n -e '/Makefile/p' \
$(foreach S,[chsSyl] cxx sh bash pl,\
- $(subst .,\.,/.$S\(.in\)*$$/p;))' \
+ $(subst .,\.,-e '/.$S\(.in\)*$$/p')) \
| $(ETAGS) -o $@ -
# Make the distribution tarfile.
diff --git a/NEWS b/NEWS
index 6061d42eb2..ca52e93b73 100644
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,4 @@
-GNU C Library NEWS -- history of user-visible changes. 2009-7-21
+GNU C Library NEWS -- history of user-visible changes. 2009-8-8
Copyright (C) 1992-2008, 2009 Free Software Foundation, Inc.
See the end for copying conditions.
@@ -17,12 +17,20 @@ Version 2.11
Implemented by H.J. Lu.
* New optimized string functions for x86-64: strstr, strcasestr, memcmp,
- strcspn, strpbrk, strspn, strcpy, stpcpy, strncpy, strcmp, strncmp.
+ strcspn, strpbrk, strspn, strcpy, stpcpy, strncpy, strcmp (SSE2, SSE4.2),
+ strncmp (SSE2, SSE4.2).
Contributed by H.J. Lu.
- strlen, rawmemchr.
+ strlen, rawmemchr, strcmp (SSSE3), strncmp (SSSE3).
Implemented by Ulrich Drepper.
+* New optimized string functions for x86: strlen, strcspn, strspn, strpbrk,
+ strstr, strcasestr.
+ Contributed by H.J. Lu.
+
+* Support for fma instruction in AVX on x86-64.
+ Implemented by H.J. Lu and Ulrich Drepper.
+
* AVX support in x86-64 auditing support in ld.so.
Implemented by H.J. Lu.
@@ -37,6 +45,16 @@ Version 2.11
necessity is every process again.
Implemented by Ulrich Drepper.
+* New resolver flag RES_USE_DNSSEC to enable use of verified lookup.
+ Implemented by Adam Tkac.
+
+* Optimized iconv conversions for S390x.
+ Implemented by Andreas Krebbel.
+
+* Using condvars with PI mutexes is now more efficient due to kernel
+ support for requeueing to PI futexes. NPTL support added for x86-64.
+ Implemented by Ulrich Drepper.
+
Version 2.10
diff --git a/elf/Makefile b/elf/Makefile
index 3baad9621d..d57c7fe7ed 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -93,6 +93,9 @@ distribute := rtld-Rules \
tst-auditmod1.c tst-auditmod3a.c tst-auditmod3b.c \
tst-auditmod4a.c tst-auditmod4b.c \
tst-audit5.c tst-auditmod5a.c tst-auditmod5b.c \
+ tst-audit6.c tst-auditmod6a.c tst-auditmod6b.c \
+ tst-auditmod6c.c \
+ tst-audit7.c tst-auditmod7a.c tst-auditmod7b.c \
order2mod1.c order2mod2.c order2mod3.c order2mod4.c \
tst-stackguard1.c tst-stackguard1-static.c \
tst-array5.c tst-array5-static.c tst-array5dep.c \
@@ -200,7 +203,7 @@ tests += loadtest restest1 preloadtest loadfail multiload origtest resolvfail \
test-srcs = tst-pathopt
tests-execstack-yes = tst-execstack tst-execstack-needed tst-execstack-prog
ifeq (x86_64,$(config-machine))
-tests += tst-audit3 tst-audit4 tst-audit5
+tests += tst-audit3 tst-audit4 tst-audit5 tst-audit6 tst-audit7
endif
endif
ifeq (yesyes,$(have-fpie)$(build-shared))
@@ -255,7 +258,9 @@ endif
ifeq (x86_64,$(config-machine))
modules-names += tst-auditmod3a tst-auditmod3b \
tst-auditmod4a tst-auditmod4b \
- tst-auditmod5a tst-auditmod5b
+ tst-auditmod5a tst-auditmod5b \
+ tst-auditmod6a tst-auditmod6b tst-auditmod6c \
+ tst-auditmod7a tst-auditmod7b
endif
modules-execstack-yes = tst-execstack-mod
extra-test-objs += $(addsuffix .os,$(strip $(modules-names)))
@@ -987,6 +992,15 @@ $(objpfx)tst-audit5: $(objpfx)tst-auditmod5a.so
$(objpfx)tst-audit5.out: $(objpfx)tst-auditmod5b.so
tst-audit5-ENV = LD_AUDIT=$(objpfx)tst-auditmod5b.so
+$(objpfx)tst-audit6: $(objpfx)tst-auditmod6a.so
+$(objpfx)tst-audit6.out: $(objpfx)tst-auditmod6b.so \
+ $(objpfx)tst-auditmod6c.so
+tst-audit6-ENV = LD_AUDIT=$(objpfx)tst-auditmod6b.so:$(objpfx)tst-auditmod6c.so
+
+$(objpfx)tst-audit7: $(objpfx)tst-auditmod7a.so
+$(objpfx)tst-audit7.out: $(objpfx)tst-auditmod7b.so
+tst-audit7-ENV = LD_AUDIT=$(objpfx)tst-auditmod7b.so
+
$(objpfx)tst-global1: $(libdl)
$(objpfx)tst-global1.out: $(objpfx)testobj6.so $(objpfx)testobj2.so
@@ -1134,4 +1148,7 @@ ifeq (yes,$(config-cflags-avx))
CFLAGS-tst-audit4.c += -mavx
CFLAGS-tst-auditmod4a.c += -mavx
CFLAGS-tst-auditmod4b.c += -mavx
+CFLAGS-tst-auditmod6b.c += -mavx
+CFLAGS-tst-auditmod6c.c += -mavx
+CFLAGS-tst-auditmod7b.c += -mavx
endif
diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c
index 56724c9b4d..c1a1366d6f 100644
--- a/elf/dl-lookup.c
+++ b/elf/dl-lookup.c
@@ -321,7 +321,7 @@ do_lookup_x (const char *undef_name, uint_fast32_t new_hash,
if (table[idx].name == NULL)
{
table[idx].hashval = hash;
- table[idx].name = strtab + sym->st_name;
+ table[idx].name = name;
if ((type_class & ELF_RTYPE_CLASS_COPY) != 0)
{
table[idx].sym = ref;
diff --git a/elf/tst-audit6.c b/elf/tst-audit6.c
new file mode 100644
index 0000000000..1f6dcb16e9
--- /dev/null
+++ b/elf/tst-audit6.c
@@ -0,0 +1,28 @@
+/* Test case for x86-64 preserved registers in dynamic linker. */
+
+#include <stdlib.h>
+#include <string.h>
+#include <cpuid.h>
+#include <emmintrin.h>
+
+extern __m128i audit_test (__m128i, __m128i, __m128i, __m128i,
+ __m128i, __m128i, __m128i, __m128i);
+
+int
+main (void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ /* Run AVX test only if AVX is supported. */
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)
+ && (ecx & bit_AVX))
+ {
+ __m128i xmm = _mm_setzero_si128 ();
+ __m128i ret = audit_test (xmm, xmm, xmm, xmm, xmm, xmm, xmm, xmm);
+
+ xmm = _mm_set1_epi32 (0x98abcdef);
+ if (memcmp (&xmm, &ret, sizeof (ret)))
+ abort ();
+ }
+ return 0;
+}
diff --git a/elf/tst-audit7.c b/elf/tst-audit7.c
new file mode 100644
index 0000000000..1d2a7de439
--- /dev/null
+++ b/elf/tst-audit7.c
@@ -0,0 +1 @@
+#include "tst-audit6.c"
diff --git a/elf/tst-auditmod6a.c b/elf/tst-auditmod6a.c
new file mode 100644
index 0000000000..c3a850ce98
--- /dev/null
+++ b/elf/tst-auditmod6a.c
@@ -0,0 +1,46 @@
+/* Test case for x86-64 preserved registers in dynamic linker. */
+
+#include <stdlib.h>
+#include <string.h>
+#include <emmintrin.h>
+
+__m128i
+audit_test (__m128i x0, __m128i x1, __m128i x2, __m128i x3,
+ __m128i x4, __m128i x5, __m128i x6, __m128i x7)
+{
+ __m128i xmm;
+
+ xmm = _mm_set1_epi32 (0x100);
+ if (memcmp (&xmm, &x0, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (0x101);
+ if (memcmp (&xmm, &x1, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (0x102);
+ if (memcmp (&xmm, &x2, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (0x103);
+ if (memcmp (&xmm, &x3, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (0x104);
+ if (memcmp (&xmm, &x4, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (0x105);
+ if (memcmp (&xmm, &x5, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (0x106);
+ if (memcmp (&xmm, &x6, sizeof (xmm)))
+ abort ();
+
+ xmm = _mm_set1_epi32 (0x107);
+ if (memcmp (&xmm, &x7, sizeof (xmm)))
+ abort ();
+
+ return _mm_setzero_si128 ();
+}
diff --git a/elf/tst-auditmod6b.c b/elf/tst-auditmod6b.c
new file mode 100644
index 0000000000..f756b50227
--- /dev/null
+++ b/elf/tst-auditmod6b.c
@@ -0,0 +1,220 @@
+/* Verify that changing AVX registers in audit library won't affect
+ function parameter passing/return. */
+
+#include <dlfcn.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bits/wordsize.h>
+#include <gnu/lib-names.h>
+
+unsigned int
+la_version (unsigned int v)
+{
+ setlinebuf (stdout);
+
+ printf ("version: %u\n", v);
+
+ char buf[20];
+ sprintf (buf, "%u", v);
+
+ return v;
+}
+
+void
+la_activity (uintptr_t *cookie, unsigned int flag)
+{
+ if (flag == LA_ACT_CONSISTENT)
+ printf ("activity: consistent\n");
+ else if (flag == LA_ACT_ADD)
+ printf ("activity: add\n");
+ else if (flag == LA_ACT_DELETE)
+ printf ("activity: delete\n");
+ else
+ printf ("activity: unknown activity %u\n", flag);
+}
+
+char *
+la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag)
+{
+ char buf[100];
+ const char *flagstr;
+ if (flag == LA_SER_ORIG)
+ flagstr = "LA_SET_ORIG";
+ else if (flag == LA_SER_LIBPATH)
+ flagstr = "LA_SER_LIBPATH";
+ else if (flag == LA_SER_RUNPATH)
+ flagstr = "LA_SER_RUNPATH";
+ else if (flag == LA_SER_CONFIG)
+ flagstr = "LA_SER_CONFIG";
+ else if (flag == LA_SER_DEFAULT)
+ flagstr = "LA_SER_DEFAULT";
+ else if (flag == LA_SER_SECURE)
+ flagstr = "LA_SER_SECURE";
+ else
+ {
+ sprintf (buf, "unknown flag %d", flag);
+ flagstr = buf;
+ }
+ printf ("objsearch: %s, %s\n", name, flagstr);
+
+ return (char *) name;
+}
+
+unsigned int
+la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie)
+{
+ printf ("objopen: %ld, %s\n", lmid, l->l_name);
+
+ return 3;
+}
+
+void
+la_preinit (uintptr_t *cookie)
+{
+ printf ("preinit\n");
+}
+
+unsigned int
+la_objclose (uintptr_t *cookie)
+{
+ printf ("objclose\n");
+ return 0;
+}
+
+uintptr_t
+la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, unsigned int *flags, const char *symname)
+{
+ printf ("symbind64: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+ return sym->st_value;
+}
+
+#define pltenter la_x86_64_gnu_pltenter
+#define pltexit la_x86_64_gnu_pltexit
+#define La_regs La_x86_64_regs
+#define La_retval La_x86_64_retval
+#define int_retval lrv_rax
+
+#include <tst-audit.h>
+
+#ifdef __AVX__
+#include <immintrin.h>
+#include <cpuid.h>
+
+static int avx = -1;
+
+static int
+__attribute ((always_inline))
+check_avx (void)
+{
+ if (avx == -1)
+ {
+ unsigned int eax, ebx, ecx, edx;
+
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)
+ && (ecx & bit_AVX))
+ avx = 1;
+ else
+ avx = 0;
+ }
+ return avx;
+}
+#else
+#include <emmintrin.h>
+#endif
+
+ElfW(Addr)
+pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, La_regs *regs, unsigned int *flags,
+ const char *symname, long int *framesizep)
+{
+ printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+#ifdef __AVX__
+ if (check_avx () && strcmp (symname, "audit_test") == 0)
+ {
+ int i;
+
+ __m128i xmm = _mm_setzero_si128 ();
+ for (i = 0; i < 8; i++)
+ if (memcmp (&regs->lr_xmm[i], &xmm, sizeof (xmm))
+ || memcmp (&regs->lr_vector[i], &xmm, sizeof (xmm)))
+ abort ();
+
+ for (i = 0; i < 8; i += 2)
+ {
+ regs->lr_xmm[i] = (La_x86_64_xmm) _mm_set1_epi32 (i + 1);
+ regs->lr_vector[i].xmm[0] = regs->lr_xmm[i];
+ regs->lr_vector[i + 1].ymm[0]
+ = (La_x86_64_ymm) _mm256_set1_epi32 (i + 2);
+ regs->lr_xmm[i + 1] = regs->lr_vector[i + 1].xmm[0];
+ }
+
+ __m256i ymm = _mm256_set1_epi32 (-1);
+ asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" );
+ asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" );
+ asm volatile ("vmovdqa %0, %%ymm2" : : "x" (ymm) : "xmm2" );
+ asm volatile ("vmovdqa %0, %%ymm3" : : "x" (ymm) : "xmm3" );
+ asm volatile ("vmovdqa %0, %%ymm4" : : "x" (ymm) : "xmm4" );
+ asm volatile ("vmovdqa %0, %%ymm5" : : "x" (ymm) : "xmm5" );
+ asm volatile ("vmovdqa %0, %%ymm6" : : "x" (ymm) : "xmm6" );
+ asm volatile ("vmovdqa %0, %%ymm7" : : "x" (ymm) : "xmm7" );
+
+ *framesizep = 1024;
+ }
+#endif
+
+ return sym->st_value;
+}
+
+unsigned int
+pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, const La_regs *inregs, La_retval *outregs,
+ const char *symname)
+{
+ printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n",
+ symname, (long int) sym->st_value, ndx, outregs->int_retval);
+
+#ifdef __AVX__
+ if (check_avx () && strcmp (symname, "audit_test") == 0)
+ {
+ int i;
+
+ __m128i xmm = _mm_setzero_si128 ();
+ if (memcmp (&outregs->lrv_xmm0, &xmm, sizeof (xmm))
+ || memcmp (&outregs->lrv_vector0, &xmm, sizeof (xmm)))
+ abort ();
+
+ __m256i ymm;
+
+ for (i = 0; i < 8; i += 2)
+ {
+ xmm = _mm_set1_epi32 (i + 0x100);
+ if (memcmp (&inregs->lr_xmm[i], &xmm, sizeof (xmm))
+ || memcmp (&inregs->lr_vector[i], &xmm, sizeof (xmm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (i + 0x101);
+ if (memcmp (&inregs->lr_xmm[i + 1],
+ &inregs->lr_vector[i + 1].xmm[0], sizeof (xmm))
+ || memcmp (&inregs->lr_vector[i + 1], &ymm, sizeof (ymm)))
+ abort ();
+ }
+
+ outregs->lrv_vector0.ymm[0]
+ = (La_x86_64_ymm) _mm256_set1_epi32 (0x12349876);
+
+ ymm = _mm256_set1_epi32 (-1);
+ asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" );
+ asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" );
+ }
+#endif
+
+ return 0;
+}
diff --git a/elf/tst-auditmod6c.c b/elf/tst-auditmod6c.c
new file mode 100644
index 0000000000..49cbf05492
--- /dev/null
+++ b/elf/tst-auditmod6c.c
@@ -0,0 +1,225 @@
+/* Verify that changing AVX registers in audit library won't affect
+ function parameter passing/return. */
+
+#include <dlfcn.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bits/wordsize.h>
+#include <gnu/lib-names.h>
+
+unsigned int
+la_version (unsigned int v)
+{
+ setlinebuf (stdout);
+
+ printf ("version: %u\n", v);
+
+ char buf[20];
+ sprintf (buf, "%u", v);
+
+ return v;
+}
+
+void
+la_activity (uintptr_t *cookie, unsigned int flag)
+{
+ if (flag == LA_ACT_CONSISTENT)
+ printf ("activity: consistent\n");
+ else if (flag == LA_ACT_ADD)
+ printf ("activity: add\n");
+ else if (flag == LA_ACT_DELETE)
+ printf ("activity: delete\n");
+ else
+ printf ("activity: unknown activity %u\n", flag);
+}
+
+char *
+la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag)
+{
+ char buf[100];
+ const char *flagstr;
+ if (flag == LA_SER_ORIG)
+ flagstr = "LA_SET_ORIG";
+ else if (flag == LA_SER_LIBPATH)
+ flagstr = "LA_SER_LIBPATH";
+ else if (flag == LA_SER_RUNPATH)
+ flagstr = "LA_SER_RUNPATH";
+ else if (flag == LA_SER_CONFIG)
+ flagstr = "LA_SER_CONFIG";
+ else if (flag == LA_SER_DEFAULT)
+ flagstr = "LA_SER_DEFAULT";
+ else if (flag == LA_SER_SECURE)
+ flagstr = "LA_SER_SECURE";
+ else
+ {
+ sprintf (buf, "unknown flag %d", flag);
+ flagstr = buf;
+ }
+ printf ("objsearch: %s, %s\n", name, flagstr);
+
+ return (char *) name;
+}
+
+unsigned int
+la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie)
+{
+ printf ("objopen: %ld, %s\n", lmid, l->l_name);
+
+ return 3;
+}
+
+void
+la_preinit (uintptr_t *cookie)
+{
+ printf ("preinit\n");
+}
+
+unsigned int
+la_objclose (uintptr_t *cookie)
+{
+ printf ("objclose\n");
+ return 0;
+}
+
+uintptr_t
+la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, unsigned int *flags, const char *symname)
+{
+ printf ("symbind64: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+ return sym->st_value;
+}
+
+#define pltenter la_x86_64_gnu_pltenter
+#define pltexit la_x86_64_gnu_pltexit
+#define La_regs La_x86_64_regs
+#define La_retval La_x86_64_retval
+#define int_retval lrv_rax
+
+#include <tst-audit.h>
+
+#ifdef __AVX__
+#include <immintrin.h>
+#include <cpuid.h>
+
+static int avx = -1;
+
+static int
+__attribute ((always_inline))
+check_avx (void)
+{
+ if (avx == -1)
+ {
+ unsigned int eax, ebx, ecx, edx;
+
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)
+ && (ecx & bit_AVX))
+ avx = 1;
+ else
+ avx = 0;
+ }
+ return avx;
+}
+#else
+#include <emmintrin.h>
+#endif
+
+ElfW(Addr)
+pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, La_regs *regs, unsigned int *flags,
+ const char *symname, long int *framesizep)
+{
+ printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+#ifdef __AVX__
+ if (check_avx () && strcmp (symname, "audit_test") == 0)
+ {
+ int i;
+ __m128i xmm;
+ __m256i ymm;
+
+ for (i = 0; i < 8; i += 2)
+ {
+ xmm = _mm_set1_epi32 (i + 1);
+ if (memcmp (&regs->lr_xmm[i], &xmm, sizeof (xmm))
+ || memcmp (&regs->lr_vector[i], &xmm, sizeof (xmm)))
+ abort ();
+ regs->lr_xmm[i] = (La_x86_64_xmm) _mm_set1_epi32 (i + 0x100);
+ regs->lr_vector[i].xmm[0] = regs->lr_xmm[i];
+
+ ymm = _mm256_set1_epi32 (i + 2);
+ if (memcmp (&regs->lr_xmm[i + 1],
+ &regs->lr_vector[i + 1].xmm[0], sizeof (xmm))
+ || memcmp (&regs->lr_vector[i + 1], &ymm, sizeof (ymm)))
+ abort ();
+ regs->lr_vector[i + 1].ymm[0]
+ = (La_x86_64_ymm) _mm256_set1_epi32 (i + 0x101);
+ regs->lr_xmm[i + 1] = regs->lr_vector[i + 1].xmm[0];
+ }
+
+ ymm = _mm256_set1_epi32 (-1);
+ asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" );
+ asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" );
+ asm volatile ("vmovdqa %0, %%ymm2" : : "x" (ymm) : "xmm2" );
+ asm volatile ("vmovdqa %0, %%ymm3" : : "x" (ymm) : "xmm3" );
+ asm volatile ("vmovdqa %0, %%ymm4" : : "x" (ymm) : "xmm4" );
+ asm volatile ("vmovdqa %0, %%ymm5" : : "x" (ymm) : "xmm5" );
+ asm volatile ("vmovdqa %0, %%ymm6" : : "x" (ymm) : "xmm6" );
+ asm volatile ("vmovdqa %0, %%ymm7" : : "x" (ymm) : "xmm7" );
+
+ *framesizep = 1024;
+ }
+#endif
+
+ return sym->st_value;
+}
+
+unsigned int
+pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, const La_regs *inregs, La_retval *outregs,
+ const char *symname)
+{
+ printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n",
+ symname, (long int) sym->st_value, ndx, outregs->int_retval);
+
+#ifdef __AVX__
+ if (check_avx () && strcmp (symname, "audit_test") == 0)
+ {
+ int i;
+
+ __m256i ymm = _mm256_set1_epi32 (0x12349876);;
+ if (memcmp (&outregs->lrv_vector0, &ymm, sizeof (ymm)))
+ abort ();
+
+ __m128i xmm;
+
+ for (i = 0; i < 8; i += 2)
+ {
+ xmm = _mm_set1_epi32 (i + 0x100);
+ if (memcmp (&inregs->lr_xmm[i], &xmm, sizeof (xmm))
+ || memcmp (&inregs->lr_vector[i], &xmm, sizeof (xmm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (i + 0x101);
+ if (memcmp (&inregs->lr_xmm[i + 1],
+ &inregs->lr_vector[i + 1].xmm[0], sizeof (xmm))
+ || memcmp (&inregs->lr_vector[i + 1], &ymm, sizeof (ymm)))
+ abort ();
+ }
+
+ outregs->lrv_vector0.ymm[0]
+ = (La_x86_64_ymm) _mm256_set1_epi32 (0x98abcdef);
+
+ ymm = _mm256_set1_epi32 (-1);
+ asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" );
+ asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" );
+ }
+#endif
+
+ return 0;
+}
diff --git a/elf/tst-auditmod7a.c b/elf/tst-auditmod7a.c
new file mode 100644
index 0000000000..b379df75d6
--- /dev/null
+++ b/elf/tst-auditmod7a.c
@@ -0,0 +1 @@
+#include "tst-auditmod6a.c"
diff --git a/elf/tst-auditmod7b.c b/elf/tst-auditmod7b.c
new file mode 100644
index 0000000000..eb237586fe
--- /dev/null
+++ b/elf/tst-auditmod7b.c
@@ -0,0 +1,218 @@
+/* Verify that changing AVX registers in audit library won't affect
+ function parameter passing/return. */
+
+#include <dlfcn.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bits/wordsize.h>
+#include <gnu/lib-names.h>
+
+unsigned int
+la_version (unsigned int v)
+{
+ setlinebuf (stdout);
+
+ printf ("version: %u\n", v);
+
+ char buf[20];
+ sprintf (buf, "%u", v);
+
+ return v;
+}
+
+void
+la_activity (uintptr_t *cookie, unsigned int flag)
+{
+ if (flag == LA_ACT_CONSISTENT)
+ printf ("activity: consistent\n");
+ else if (flag == LA_ACT_ADD)
+ printf ("activity: add\n");
+ else if (flag == LA_ACT_DELETE)
+ printf ("activity: delete\n");
+ else
+ printf ("activity: unknown activity %u\n", flag);
+}
+
+char *
+la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag)
+{
+ char buf[100];
+ const char *flagstr;
+ if (flag == LA_SER_ORIG)
+ flagstr = "LA_SET_ORIG";
+ else if (flag == LA_SER_LIBPATH)
+ flagstr = "LA_SER_LIBPATH";
+ else if (flag == LA_SER_RUNPATH)
+ flagstr = "LA_SER_RUNPATH";
+ else if (flag == LA_SER_CONFIG)
+ flagstr = "LA_SER_CONFIG";
+ else if (flag == LA_SER_DEFAULT)
+ flagstr = "LA_SER_DEFAULT";
+ else if (flag == LA_SER_SECURE)
+ flagstr = "LA_SER_SECURE";
+ else
+ {
+ sprintf (buf, "unknown flag %d", flag);
+ flagstr = buf;
+ }
+ printf ("objsearch: %s, %s\n", name, flagstr);
+
+ return (char *) name;
+}
+
+unsigned int
+la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie)
+{
+ printf ("objopen: %ld, %s\n", lmid, l->l_name);
+
+ return 3;
+}
+
+void
+la_preinit (uintptr_t *cookie)
+{
+ printf ("preinit\n");
+}
+
+unsigned int
+la_objclose (uintptr_t *cookie)
+{
+ printf ("objclose\n");
+ return 0;
+}
+
+uintptr_t
+la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, unsigned int *flags, const char *symname)
+{
+ printf ("symbind64: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+ return sym->st_value;
+}
+
+#define pltenter la_x86_64_gnu_pltenter
+#define pltexit la_x86_64_gnu_pltexit
+#define La_regs La_x86_64_regs
+#define La_retval La_x86_64_retval
+#define int_retval lrv_rax
+
+#include <tst-audit.h>
+
+#ifdef __AVX__
+#include <immintrin.h>
+#include <cpuid.h>
+
+static int avx = -1;
+
+static int
+__attribute ((always_inline))
+check_avx (void)
+{
+ if (avx == -1)
+ {
+ unsigned int eax, ebx, ecx, edx;
+
+ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)
+ && (ecx & bit_AVX))
+ avx = 1;
+ else
+ avx = 0;
+ }
+ return avx;
+}
+#else
+#include <emmintrin.h>
+#endif
+
+ElfW(Addr)
+pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, La_regs *regs, unsigned int *flags,
+ const char *symname, long int *framesizep)
+{
+ printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+#ifdef __AVX__
+ if (check_avx () && strcmp (symname, "audit_test") == 0)
+ {
+ int i;
+
+ __m128i xmm = _mm_setzero_si128 ();
+ for (i = 0; i < 8; i++)
+ if (memcmp (&regs->lr_xmm[i], &xmm, sizeof (xmm))
+ || memcmp (&regs->lr_vector[i], &xmm, sizeof (xmm)))
+ abort ();
+
+ for (i = 0; i < 8; i += 2)
+ {
+ regs->lr_xmm[i] = (La_x86_64_xmm) _mm_set1_epi32 (i + 0x100);
+ regs->lr_vector[i + 1].ymm[0]
+ = (La_x86_64_ymm) _mm256_set1_epi32 (i + 0x101);
+ }
+
+ __m256i ymm = _mm256_set1_epi32 (-1);
+ asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" );
+ asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" );
+ asm volatile ("vmovdqa %0, %%ymm2" : : "x" (ymm) : "xmm2" );
+ asm volatile ("vmovdqa %0, %%ymm3" : : "x" (ymm) : "xmm3" );
+ asm volatile ("vmovdqa %0, %%ymm4" : : "x" (ymm) : "xmm4" );
+ asm volatile ("vmovdqa %0, %%ymm5" : : "x" (ymm) : "xmm5" );
+ asm volatile ("vmovdqa %0, %%ymm6" : : "x" (ymm) : "xmm6" );
+ asm volatile ("vmovdqa %0, %%ymm7" : : "x" (ymm) : "xmm7" );
+
+ *framesizep = 1024;
+ }
+#endif
+
+ return sym->st_value;
+}
+
+unsigned int
+pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, const La_regs *inregs, La_retval *outregs,
+ const char *symname)
+{
+ printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n",
+ symname, (long int) sym->st_value, ndx, outregs->int_retval);
+
+#ifdef __AVX__
+ if (check_avx () && strcmp (symname, "audit_test") == 0)
+ {
+ int i;
+
+ __m128i xmm = _mm_setzero_si128 ();
+ if (memcmp (&outregs->lrv_xmm0, &xmm, sizeof (xmm))
+ || memcmp (&outregs->lrv_vector0, &xmm, sizeof (xmm)))
+ abort ();
+
+ __m256i ymm;
+
+ for (i = 0; i < 8; i += 2)
+ {
+ xmm = _mm_set1_epi32 (i + 0x100);
+ if (memcmp (&inregs->lr_xmm[i], &xmm, sizeof (xmm))
+ || memcmp (&inregs->lr_vector[i], &xmm, sizeof (xmm)))
+ abort ();
+
+ ymm = _mm256_set1_epi32 (i + 0x101);
+ if (memcmp (&inregs->lr_xmm[i + 1],
+ &inregs->lr_vector[i + 1].xmm[0], sizeof (xmm))
+ || memcmp (&inregs->lr_vector[i + 1], &ymm, sizeof (ymm)))
+ abort ();
+ }
+
+ outregs->lrv_vector0.ymm[0]
+ = (La_x86_64_ymm) _mm256_set1_epi32 (0x98abcdef);
+
+ ymm = _mm256_set1_epi32 (-1);
+ asm volatile ("vmovdqa %0, %%ymm0" : : "x" (ymm) : "xmm0" );
+ asm volatile ("vmovdqa %0, %%ymm1" : : "x" (ymm) : "xmm1" );
+ }
+#endif
+
+ return 0;
+}
diff --git a/nptl/ChangeLog b/nptl/ChangeLog
index 83d0dda46c..fe3e90f310 100644
--- a/nptl/ChangeLog
+++ b/nptl/ChangeLog
@@ -1,3 +1,33 @@
+2009-08-08 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S (sem_timedwait):
+ Optimize code path used when FUTEX_CLOCK_REALTIME is supported.
+
+ * sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
+ (__pthread_cond_wait): Optimize by avoiding use of callee-safe
+ register.
+
+2009-08-07 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/unix/sysv/linux/x86_64/sem_wait.S: Little optimizations
+ enabled by the special *_asynccancel functions.
+ * sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Likewise.
+ * sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Likewise.
+
+ * sysdeps/unix/sysv/linux/x86_64/cancellation.S: Include lowlevellock.h.
+
+2009-08-04 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/unix/sysv/linux/x86_64/cancellation.S: New file.
+ * sysdeps/unix/sysv/linux/x86_64/libc-cancellation.S: New file.
+ * sysdeps/unix/sysv/linux/x86_64/librt-cancellation.S: New file.
+ * sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h (PSEUDO): Optimize
+ since we can assume the special __*_{en,dis}able_asynccancel
+ functions.
+ (PUSHARGS_*, POPARGS_*, SAVESTK_*, RESTSTK_*): Removed.
+ * sysdeps/x86_64/tcb-offsets.sym: Add cancellation-related bits
+ and PTHREAD_CANCELED.
+
2009-07-31 Ulrich Drepper <drepper@redhat.com>
* descr.h: Better definition of *_BITMASK macros for cancellation.
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S b/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S
new file mode 100644
index 0000000000..0d48ec6fcd
--- /dev/null
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S
@@ -0,0 +1,115 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@redhat.com>, 2009.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <tcb-offsets.h>
+#include <kernel-features.h>
+#include "lowlevellock.h"
+
+#ifdef IS_IN_libpthread
+# ifdef SHARED
+# define __pthread_unwind __GI___pthread_unwind
+# endif
+#else
+# ifndef SHARED
+ .weak __pthread_unwind
+# endif
+#endif
+
+
+#ifdef __ASSUME_PRIVATE_FUTEX
+# define LOAD_PRIVATE_FUTEX_WAIT(reg) \
+ movl $(FUTEX_WAIT | FUTEX_PRIVATE_FLAG), reg
+#else
+# if FUTEX_WAIT == 0
+# define LOAD_PRIVATE_FUTEX_WAIT(reg) \
+ movl %fs:PRIVATE_FUTEX, reg
+# else
+# define LOAD_PRIVATE_FUTEX_WAIT(reg) \
+ movl %fs:PRIVATE_FUTEX, reg ; \
+ orl $FUTEX_WAIT, reg
+# endif
+#endif
+
+/* It is crucial that the functions in this file don't modify registers
+ other than %rax and %r11. The syscall wrapper code depends on this
+ because it doesn't explicitly save the other registers which hold
+ relevant values. */
+ .text
+
+ .hidden __pthread_enable_asynccancel
+ENTRY(__pthread_enable_asynccancel)
+ movl %fs:CANCELHANDLING, %eax
+2: movl %eax, %r11d
+ orl $TCB_CANCELTYPE_BITMASK, %r11d
+ cmpl %eax, %r11d
+ je 1f
+
+ lock
+ cmpxchgl %r11d, %fs:CANCELHANDLING
+ jnz 2b
+
+ andl $(TCB_CANCELSTATE_BITMASK|TCB_CANCELTYPE_BITMASK|TCB_CANCELED_BITMASK|TCB_EXITING_BITMASK|TCB_CANCEL_RESTMASK|TCB_TERMINATED_BITMASK), %r11d
+ cmpl $(TCB_CANCELTYPE_BITMASK|TCB_CANCELED_BITMASK), %r11d
+ je 3f
+
+1: ret
+
+3: movq $TCB_PTHREAD_CANCELED, %fs:RESULT
+ lock
+ orl $TCB_EXITING_BITMASK, %fs:CANCELHANDLING
+ movq %fs:CLEANUP_JMP_BUF, %rdi
+#ifdef SHARED
+ call __pthread_unwind@PLT
+#else
+ call __pthread_unwind
+#endif
+ hlt
+END(__pthread_enable_asynccancel)
+
+
+ .hidden __pthread_disable_asynccancel
+ENTRY(__pthread_disable_asynccancel)
+ testl $TCB_CANCELTYPE_BITMASK, %edi
+ jnz 1f
+
+ movl %fs:CANCELHANDLING, %eax
+2: movl %eax, %r11d
+ andl $~TCB_CANCELTYPE_BITMASK, %r11d
+ lock
+ cmpxchgl %r11d, %fs:CANCELHANDLING
+ jnz 2b
+
+3: movl %r11d, %eax
+ andl $(TCB_CANCELING_BITMASK|TCB_CANCELED_BITMASK), %eax
+ cmpl $TCB_CANCELING_BITMASK, %eax
+ je 4f
+1: ret
+
+ /* Performance doesn't matter in this loop. We will
+ delay until the thread is canceled. And we will unlikely
+ enter the loop twice. */
+4: movq %fs:0, %rdi
+ movl $__NR_futex, %eax
+ xorq %r10, %r10
+ addq $CANCELHANDLING, %rdi
+ LOAD_PRIVATE_FUTEX_WAIT (%esi)
+ syscall
+ jmp 3b
+END(__pthread_disable_asynccancel)
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/libc-cancellation.S b/nptl/sysdeps/unix/sysv/linux/x86_64/libc-cancellation.S
new file mode 100644
index 0000000000..1100588502
--- /dev/null
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/libc-cancellation.S
@@ -0,0 +1,22 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@redhat.com>, 2009.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#define __pthread_enable_asynccancel __libc_enable_asynccancel
+#define __pthread_disable_asynccancel __libc_disable_asynccancel
+#include "cancellation.S"
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/librt-cancellation.S b/nptl/sysdeps/unix/sysv/linux/x86_64/librt-cancellation.S
new file mode 100644
index 0000000000..ce4192b5d3
--- /dev/null
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/librt-cancellation.S
@@ -0,0 +1,22 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@redhat.com>, 2009.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#define __pthread_enable_asynccancel __librt_enable_asynccancel
+#define __pthread_disable_asynccancel __librt_disable_asynccancel
+#include "cancellation.S"
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
index 4913beb8af..86bdac1b1b 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
@@ -157,7 +157,6 @@ __pthread_cond_timedwait:
.LcleanupSTART1:
34: callq __pthread_enable_asynccancel
movl %eax, (%rsp)
- movq 8(%rsp), %rdi
movq %r13, %r10
movl $FUTEX_WAIT_BITSET, %esi
@@ -511,7 +510,6 @@ __pthread_cond_timedwait:
.LcleanupSTART2:
4: callq __pthread_enable_asynccancel
movl %eax, (%rsp)
- movq 8(%rsp), %rdi
leaq 32(%rsp), %r10
cmpq $-1, dep_mutex(%rdi)
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
index a66523eab6..f5b929ea71 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
@@ -45,14 +45,8 @@ __pthread_cond_wait:
cfi_lsda(DW_EH_PE_udata4, .LexceptSTART)
#endif
- pushq %r12
- cfi_adjust_cfa_offset(8)
- cfi_rel_offset(%r12, 0)
- pushq %r13
- cfi_adjust_cfa_offset(8)
- cfi_rel_offset(%r13, 0)
#define FRAME_SIZE 32
- subq $FRAME_SIZE, %rsp
+ leaq -FRAME_SIZE(%rsp), %rsp
cfi_adjust_cfa_offset(FRAME_SIZE)
/* Stack frame:
@@ -112,7 +106,7 @@ __pthread_cond_wait:
movl %edx, 4(%rsp)
/* Unlock. */
-8: movl cond_futex(%rdi), %r12d
+8: movl cond_futex(%rdi), %edx
LOCK
#if cond_lock == 0
decl (%rdi)
@@ -125,9 +119,7 @@ __pthread_cond_wait:
4: callq __pthread_enable_asynccancel
movl %eax, (%rsp)
- movq 8(%rsp), %rdi
xorq %r10, %r10
- movq %r12, %rdx
cmpq $-1, dep_mutex(%rdi)
leaq cond_futex(%rdi), %rdi
movl $FUTEX_WAIT, %esi
@@ -145,7 +137,7 @@ __pthread_cond_wait:
movl $SYS_futex, %eax
syscall
- movl $1, %r13d
+ movl $1, %r8d
#ifdef __ASSUME_REQUEUE_PI
jmp 62f
#else
@@ -163,7 +155,7 @@ __pthread_cond_wait:
#else
orl %fs:PRIVATE_FUTEX, %esi
#endif
-60: xorl %r13d, %r13d
+60: xorl %r8d, %r8d
movl $SYS_futex, %eax
syscall
@@ -238,27 +230,18 @@ __pthread_cond_wait:
/* If requeue_pi is used the kernel performs the locking of the
mutex. */
11: movq 16(%rsp), %rdi
- testl %r13d, %r13d
+ testl %r8d, %r8d
jnz 18f
callq __pthread_mutex_cond_lock
-14: addq $FRAME_SIZE, %rsp
+14: leaq FRAME_SIZE(%rsp), %rsp
cfi_adjust_cfa_offset(-FRAME_SIZE)
- popq %r13
- cfi_adjust_cfa_offset(-8)
- cfi_restore(%r13)
- popq %r12
- cfi_adjust_cfa_offset(-8)
- cfi_restore(%r12)
-
/* We return the result of the mutex_lock operation. */
retq
- cfi_adjust_cfa_offset(16 + FRAME_SIZE)
- cfi_rel_offset(%r12, FRAME_SIZE + 8)
- cfi_rel_offset(%r13, FRAME_SIZE)
+ cfi_adjust_cfa_offset(FRAME_SIZE)
18: callq __pthread_mutex_cond_lock_adjust
xorl %eax, %eax
@@ -285,7 +268,11 @@ __pthread_cond_wait:
movl $LLL_PRIVATE, %eax
movl $LLL_SHARED, %esi
cmovne %eax, %esi
+ /* The call preserves %rdx. */
callq __lll_unlock_wake
+#if cond_lock != 0
+ subq $cond_lock, %rdi
+#endif
jmp 4b
/* Locking in loop failed. */
@@ -349,11 +336,7 @@ versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
__condvar_cleanup1:
/* Stack frame:
- rsp + 48
- +--------------------------+
- rsp + 40 | %r12 |
- +--------------------------+
- rsp + 32 | %r13 |
+ rsp + 32
+--------------------------+
rsp + 24 | unused |
+--------------------------+
@@ -410,7 +393,7 @@ __condvar_cleanup1:
3: subl $(1 << nwaiters_shift), cond_nwaiters(%rdi)
/* Wake up a thread which wants to destroy the condvar object. */
- xorq %r12, %r12
+ xorl %ecx, %ecx
cmpq $0xffffffffffffffff, total_seq(%rdi)
jne 4f
movl cond_nwaiters(%rdi), %eax
@@ -433,7 +416,7 @@ __condvar_cleanup1:
movl $SYS_futex, %eax
syscall
subq $cond_nwaiters, %rdi
- movl $1, %r12d
+ movl $1, %ecx
4: LOCK
#if cond_lock == 0
@@ -449,10 +432,11 @@ __condvar_cleanup1:
movl $LLL_PRIVATE, %eax
movl $LLL_SHARED, %esi
cmovne %eax, %esi
+ /* The call preserves %rcx. */
callq __lll_unlock_wake
/* Wake up all waiters to make sure no signal gets lost. */
-2: testq %r12, %r12
+2: testl %ecx, %ecx
jnz 5f
addq $cond_futex, %rdi
cmpq $-1, dep_mutex-cond_futex(%rdi)
@@ -474,8 +458,6 @@ __condvar_cleanup1:
callq __pthread_mutex_cond_lock
movq 24(%rsp), %rdi
- movq 40(%rsp), %r12
- movq 32(%rsp), %r13
.LcallUR:
call _Unwind_Resume@PLT
hlt
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S
index 95762834d3..0291beb169 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S
@@ -65,34 +65,9 @@ sem_timedwait:
retq
/* Check whether the timeout value is valid. */
-1: pushq %r12
- cfi_adjust_cfa_offset(8)
- cfi_rel_offset(%r12, 0)
- pushq %r13
- cfi_adjust_cfa_offset(8)
- cfi_rel_offset(%r13, 0)
- pushq %r14
- cfi_adjust_cfa_offset(8)
- cfi_rel_offset(%r14, 0)
-#ifdef __ASSUME_FUTEX_CLOCK_REALTIME
-# define STACKFRAME 8
-#else
-# define STACKFRAME 24
-#endif
- subq $STACKFRAME, %rsp
- cfi_adjust_cfa_offset(STACKFRAME)
-
- movq %rdi, %r12
- movq %rsi, %r13
-
- /* Check for invalid nanosecond field. */
- cmpq $1000000000, 8(%r13)
- movl $EINVAL, %r14d
+1: cmpq $1000000000, 8(%rsi)
jae 6f
- LOCK
- addq $1, NWAITERS(%r12)
-
#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
# ifdef PIC
cmpl $0, __have_futex_clock_realtime(%rip)
@@ -102,15 +77,22 @@ sem_timedwait:
je .Lreltmo
#endif
+ /* This push is only needed to store the sem_t pointer for the
+ exception handler. */
+ pushq %rdi
+ cfi_adjust_cfa_offset(8)
+
+ movq %rsi, %r10
+
+ LOCK
+ addq $1, NWAITERS(%rdi)
+
.LcleanupSTART:
13: call __pthread_enable_asynccancel
- movl %eax, (%rsp)
+ movl %eax, %r8d
- movq %r13, %r10
-#if VALUE == 0
- movq %r12, %rdi
-#else
- leaq VALUE(%r12), %rdi
+#if VALUE != 0
+ leaq VALUE(%rdi), %rdi
#endif
movl $0xffffffff, %r9d
movl $FUTEX_WAIT_BITSET|FUTEX_CLOCK_REALTIME, %esi
@@ -118,22 +100,26 @@ sem_timedwait:
movl $SYS_futex, %eax
xorl %edx, %edx
syscall
- movq %rax, %r14
+ movq %rax, %r9
+#if VALUE != 0
+ leaq -VALUE(%rdi), %rdi
+#endif
- movl (%rsp), %edi
+ xchgq %r8, %rdi
call __pthread_disable_asynccancel
.LcleanupEND:
+ movq %r8, %rdi
- testq %r14, %r14
+ testq %r9, %r9
je 11f
- cmpq $-EWOULDBLOCK, %r14
+ cmpq $-EWOULDBLOCK, %r9
jne 3f
11:
#if VALUE == 0
- movl (%r12), %eax
+ movl (%rdi), %eax
#else
- movl VALUE(%r12), %eax
+ movl VALUE(%rdi), %eax
#endif
14: testl %eax, %eax
je 13b
@@ -141,49 +127,74 @@ sem_timedwait:
leaq -1(%rax), %rcx
LOCK
#if VALUE == 0
- cmpxchgl %ecx, (%r12)
+ cmpxchgl %ecx, (%rdi)
#else
- cmpxchgl %ecx, VALUE(%r12)
+ cmpxchgl %ecx, VALUE(%rdi)
#endif
jne 14b
-10: xorl %eax, %eax
+ xorl %eax, %eax
15: LOCK
- subq $1, NWAITERS(%r12)
+ subq $1, NWAITERS(%rdi)
- addq $STACKFRAME, %rsp
- cfi_adjust_cfa_offset(-STACKFRAME)
- popq %r14
- cfi_adjust_cfa_offset(-8)
- cfi_restore(%r14)
- popq %r13
+ leaq 8(%rsp), %rsp
cfi_adjust_cfa_offset(-8)
- cfi_restore(%r13)
- popq %r12
- cfi_adjust_cfa_offset(-8)
- cfi_restore(%r12)
retq
- cfi_adjust_cfa_offset(STACKFRAME + 3 * 8)
- cfi_rel_offset(%r12, STACKFRAME + 2 * 8)
- cfi_rel_offset(%r13, STACKFRAME + 1 * 8)
- cfi_rel_offset(%r14, STACKFRAME)
-3: negq %r14
-6:
+ cfi_adjust_cfa_offset(8)
+3: negq %r9
#if USE___THREAD
movq errno@gottpoff(%rip), %rdx
- movl %r14d, %fs:(%rdx)
+ movl %r9d, %fs:(%rdx)
#else
callq __errno_location@plt
- movl %r14d, (%rax)
+ movl %r9d, (%rax)
#endif
orl $-1, %eax
jmp 15b
+ cfi_adjust_cfa_offset(-8)
+6:
+#if USE___THREAD
+ movq errno@gottpoff(%rip), %rdx
+ movl $EINVAL, %fs:(%rdx)
+#else
+ callq __errno_location@plt
+ movl $EINVAL, (%rax)
+#endif
+
+ orl $-1, %eax
+
+ retq
+
#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
.Lreltmo:
+ pushq %r12
+ cfi_adjust_cfa_offset(8)
+ cfi_rel_offset(%r12, 0)
+ pushq %r13
+ cfi_adjust_cfa_offset(8)
+ cfi_rel_offset(%r13, 0)
+ pushq %r14
+ cfi_adjust_cfa_offset(8)
+ cfi_rel_offset(%r14, 0)
+
+#ifdef __ASSUME_FUTEX_CLOCK_REALTIME
+# define STACKFRAME 8
+#else
+# define STACKFRAME 24
+#endif
+ subq $STACKFRAME, %rsp
+ cfi_adjust_cfa_offset(STACKFRAME)
+
+ movq %rdi, %r12
+ movq %rsi, %r13
+
+ LOCK
+ addq $1, NWAITERS(%r12)
+
7: xorl %esi, %esi
movq %rsp, %rdi
movq $VSYSCALL_ADDR_vgettimeofday, %rax
@@ -202,7 +213,7 @@ sem_timedwait:
decq %rdi
5: testq %rdi, %rdi
movl $ETIMEDOUT, %r14d
- js 6b /* Time is already up. */
+ js 36f /* Time is already up. */
movq %rdi, (%rsp) /* Store relative timeout. */
movq %rsi, 8(%rsp)
@@ -235,7 +246,7 @@ sem_timedwait:
testq %r14, %r14
je 9f
cmpq $-EWOULDBLOCK, %r14
- jne 3b
+ jne 33f
9:
# if VALUE == 0
@@ -254,15 +265,54 @@ sem_timedwait:
cmpxchgl %ecx, VALUE(%r12)
# endif
jne 8b
- jmp 10b
+
+ xorl %eax, %eax
+
+45: LOCK
+ subq $1, NWAITERS(%r12)
+
+ addq $STACKFRAME, %rsp
+ cfi_adjust_cfa_offset(-STACKFRAME)
+ popq %r14
+ cfi_adjust_cfa_offset(-8)
+ cfi_restore(%r14)
+ popq %r13
+ cfi_adjust_cfa_offset(-8)
+ cfi_restore(%r13)
+ popq %r12
+ cfi_adjust_cfa_offset(-8)
+ cfi_restore(%r12)
+ retq
+
+ cfi_adjust_cfa_offset(STACKFRAME + 3 * 8)
+ cfi_rel_offset(%r12, STACKFRAME + 2 * 8)
+ cfi_rel_offset(%r13, STACKFRAME + 1 * 8)
+ cfi_rel_offset(%r14, STACKFRAME)
+33: negq %r14
+36:
+#if USE___THREAD
+ movq errno@gottpoff(%rip), %rdx
+ movl %r14d, %fs:(%rdx)
+#else
+ callq __errno_location@plt
+ movl %r14d, (%rax)
#endif
+
+ orl $-1, %eax
+ jmp 45b
+#endif
+ cfi_endproc
.size sem_timedwait,.-sem_timedwait
.type sem_timedwait_cleanup,@function
sem_timedwait_cleanup:
+ cfi_startproc
+ cfi_adjust_cfa_offset(8)
+
+ movq (%rsp), %rdi
LOCK
- subq $1, NWAITERS(%r12)
+ subq $1, NWAITERS(%rdi)
movq %rax, %rdi
.LcallUR:
call _Unwind_Resume@PLT
@@ -272,6 +322,30 @@ sem_timedwait_cleanup:
.size sem_timedwait_cleanup,.-sem_timedwait_cleanup
+#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
+ .type sem_timedwait_cleanup2,@function
+sem_timedwait_cleanup2:
+ cfi_startproc
+ cfi_adjust_cfa_offset(STACKFRAME + 3 * 8)
+ cfi_rel_offset(%r12, STACKFRAME + 2 * 8)
+ cfi_rel_offset(%r13, STACKFRAME + 1 * 8)
+ cfi_rel_offset(%r14, STACKFRAME)
+
+ LOCK
+ subq $1, NWAITERS(%r12)
+ movq %rax, %rdi
+ movq STACKFRAME(%rsp), %r14
+ movq STACKFRAME+8(%rsp), %r13
+ movq STACKFRAME+16(%rsp), %r12
+.LcallUR2:
+ call _Unwind_Resume@PLT
+ hlt
+.LENDCODE2:
+ cfi_endproc
+ .size sem_timedwait_cleanup2,.-sem_timedwait_cleanup2
+#endif
+
+
.section .gcc_except_table,"a",@progbits
.LexceptSTART:
.byte DW_EH_PE_omit # @LPStart format
@@ -286,13 +360,19 @@ sem_timedwait_cleanup:
#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
.uleb128 .LcleanupSTART2-.LSTARTCODE
.uleb128 .LcleanupEND2-.LcleanupSTART2
- .uleb128 sem_timedwait_cleanup-.LSTARTCODE
+ .uleb128 sem_timedwait_cleanup2-.LSTARTCODE
.uleb128 0
#endif
.uleb128 .LcallUR-.LSTARTCODE
.uleb128 .LENDCODE-.LcallUR
.uleb128 0
.uleb128 0
+#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
+ .uleb128 .LcallUR2-.LSTARTCODE
+ .uleb128 .LENDCODE2-.LcallUR2
+ .uleb128 0
+ .uleb128 0
+#endif
.Lcstend:
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S
index a01d745a17..2cf6ec10a4 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S
@@ -61,16 +61,13 @@ sem_wait:
xorl %eax, %eax
retq
-1: pushq %r12
+ /* This push is only needed to store the sem_t pointer for the
+ exception handler. */
+1: pushq %rdi
cfi_adjust_cfa_offset(8)
- cfi_rel_offset(%r12, 0)
- pushq %r13
- cfi_adjust_cfa_offset(8)
- cfi_rel_offset(%r13, 0)
- movq %rdi, %r13
LOCK
- addq $1, NWAITERS(%r13)
+ addq $1, NWAITERS(%rdi)
.LcleanupSTART:
6: call __pthread_enable_asynccancel
@@ -78,7 +75,6 @@ sem_wait:
xorq %r10, %r10
movl $SYS_futex, %eax
- movq %r13, %rdi
#if FUTEX_WAIT == 0
movl PRIVATE(%rdi), %esi
#else
@@ -87,22 +83,23 @@ sem_wait:
#endif
xorl %edx, %edx
syscall
- movq %rax, %r12
+ movq %rax, %rcx
- movl %r8d, %edi
+ xchgq %r8, %rdi
call __pthread_disable_asynccancel
.LcleanupEND:
+ movq %r8, %rdi
- testq %r12, %r12
+ testq %rcx, %rcx
je 3f
- cmpq $-EWOULDBLOCK, %r12
+ cmpq $-EWOULDBLOCK, %rcx
jne 4f
3:
#if VALUE == 0
- movl (%r13), %eax
+ movl (%rdi), %eax
#else
- movl VALUE(%r13), %eax
+ movl VALUE(%rdi), %eax
#endif
5: testl %eax, %eax
je 6b
@@ -110,50 +107,43 @@ sem_wait:
leal -1(%rax), %edx
LOCK
#if VALUE == 0
- cmpxchgl %edx, (%r13)
+ cmpxchgl %edx, (%rdi)
#else
- cmpxchgl %edx, VALUE(%r13)
+ cmpxchgl %edx, VALUE(%rdi)
#endif
jne 5b
- LOCK
- subq $1, NWAITERS(%r13)
-
xorl %eax, %eax
-9: popq %r13
- cfi_adjust_cfa_offset(-8)
- cfi_restore(%r13)
- popq %r12
+9: LOCK
+ subq $1, NWAITERS(%rdi)
+
+ leaq 8(%rsp), %rsp
cfi_adjust_cfa_offset(-8)
- cfi_restore(%r12)
retq
- cfi_adjust_cfa_offset(2 * 8)
- cfi_rel_offset(%r12, 8)
- cfi_rel_offset(%r13, 0)
-4: negq %r12
+ cfi_adjust_cfa_offset(8)
+4: negq %rcx
#if USE___THREAD
movq errno@gottpoff(%rip), %rdx
- movl %r12d, %fs:(%rdx)
+ movl %ecx, %fs:(%rdx)
#else
+# error "not supported. %rcx and %rdi must be preserved"
callq __errno_location@plt
- movl %r12d, (%rax)
+ movl %ecx, (%rax)
#endif
orl $-1, %eax
- LOCK
- subq $1, NWAITERS(%r13)
-
jmp 9b
.size sem_wait,.-sem_wait
.type sem_wait_cleanup,@function
sem_wait_cleanup:
+ movq (%rsp), %rdi
LOCK
- subq $1, NWAITERS(%r13)
+ subq $1, NWAITERS(%rdi)
movq %rax, %rdi
.LcallUR:
call _Unwind_Resume@PLT
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h b/nptl/sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h
index 3e741da794..1e92de1dcc 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2006, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Jakub Jelinek <jakub@redhat.com>, 2002.
@@ -25,6 +25,10 @@
#if !defined NOT_IN_libc || defined IS_IN_libpthread || defined IS_IN_librt
+/* The code to disable cancellation depends on the fact that the called
+ functions are special. They don't modify registers other than %rax
+ and %r11 if they return. Therefore we don't have to preserve other
+ registers around these calls. */
# undef PSEUDO
# define PSEUDO(name, syscall_name, args) \
.text; \
@@ -40,60 +44,23 @@
ret; \
.size __##syscall_name##_nocancel,.-__##syscall_name##_nocancel; \
L(pseudo_cancel): \
- /* Save registers that might get destroyed. */ \
- SAVESTK_##args \
- PUSHARGS_##args \
+ /* We always have to align the stack before calling a function. */ \
+ subq $8, %rsp; cfi_adjust_cfa_offset (8); \
CENABLE \
- /* Restore registers. */ \
- POPARGS_##args \
/* The return value from CENABLE is argument for CDISABLE. */ \
movq %rax, (%rsp); \
- movl $SYS_ify (syscall_name), %eax; \
- syscall; \
+ DO_CALL (syscall_name, args); \
movq (%rsp), %rdi; \
/* Save %rax since it's the error code from the syscall. */ \
- movq %rax, 8(%rsp); \
+ movq %rax, %rdx; \
CDISABLE \
- movq 8(%rsp), %rax; \
- RESTSTK_##args \
+ movq %rdx, %rax; \
+ addq $8,%rsp; cfi_adjust_cfa_offset (-8); \
cmpq $-4095, %rax; \
jae SYSCALL_ERROR_LABEL; \
L(pseudo_end):
-# define PUSHARGS_0 /* Nothing. */
-# define PUSHARGS_1 PUSHARGS_0 movq %rdi, 8(%rsp);
-# define PUSHARGS_2 PUSHARGS_1 movq %rsi, 16(%rsp);
-# define PUSHARGS_3 PUSHARGS_2 movq %rdx, 24(%rsp);
-# define PUSHARGS_4 PUSHARGS_3 movq %rcx, 32(%rsp);
-# define PUSHARGS_5 PUSHARGS_4 movq %r8, 40(%rsp);
-# define PUSHARGS_6 PUSHARGS_5 movq %r9, 48(%rsp);
-
-# define POPARGS_0 /* Nothing. */
-# define POPARGS_1 POPARGS_0 movq 8(%rsp), %rdi;
-# define POPARGS_2 POPARGS_1 movq 16(%rsp), %rsi;
-# define POPARGS_3 POPARGS_2 movq 24(%rsp), %rdx;
-# define POPARGS_4 POPARGS_3 movq 32(%rsp), %r10;
-# define POPARGS_5 POPARGS_4 movq 40(%rsp), %r8;
-# define POPARGS_6 POPARGS_5 movq 48(%rsp), %r9;
-
-/* We always have to align the stack before calling a function. */
-# define SAVESTK_0 subq $24, %rsp; cfi_adjust_cfa_offset (24);
-# define SAVESTK_1 SAVESTK_0
-# define SAVESTK_2 SAVESTK_1
-# define SAVESTK_3 subq $40, %rsp; cfi_adjust_cfa_offset (40);
-# define SAVESTK_4 SAVESTK_3
-# define SAVESTK_5 subq $56, %rsp; cfi_adjust_cfa_offset (56);
-# define SAVESTK_6 SAVESTK_5
-
-# define RESTSTK_0 addq $24,%rsp; cfi_adjust_cfa_offset (-24);
-# define RESTSTK_1 RESTSTK_0
-# define RESTSTK_2 RESTSTK_1
-# define RESTSTK_3 addq $40, %rsp; cfi_adjust_cfa_offset (-40);
-# define RESTSTK_4 RESTSTK_3
-# define RESTSTK_5 addq $56, %rsp; cfi_adjust_cfa_offset (-56);
-# define RESTSTK_6 RESTSTK_5
-
# ifdef IS_IN_libpthread
# define CENABLE call __pthread_enable_asynccancel;
# define CDISABLE call __pthread_disable_asynccancel;
diff --git a/nptl/sysdeps/x86_64/tcb-offsets.sym b/nptl/sysdeps/x86_64/tcb-offsets.sym
index 51f35c61cf..cf863752ee 100644
--- a/nptl/sysdeps/x86_64/tcb-offsets.sym
+++ b/nptl/sysdeps/x86_64/tcb-offsets.sym
@@ -16,3 +16,13 @@ VGETCPU_CACHE_OFFSET offsetof (tcbhead_t, vgetcpu_cache)
PRIVATE_FUTEX offsetof (tcbhead_t, private_futex)
#endif
RTLD_SAVESPACE_SSE offsetof (tcbhead_t, rtld_savespace_sse)
+
+-- Not strictly offsets, but these values are also used in the TCB.
+TCB_CANCELSTATE_BITMASK CANCELSTATE_BITMASK
+TCB_CANCELTYPE_BITMASK CANCELTYPE_BITMASK
+TCB_CANCELING_BITMASK CANCELING_BITMASK
+TCB_CANCELED_BITMASK CANCELED_BITMASK
+TCB_EXITING_BITMASK EXITING_BITMASK
+TCB_CANCEL_RESTMASK CANCEL_RESTMASK
+TCB_TERMINATED_BITMASK TERMINATED_BITMASK
+TCB_PTHREAD_CANCELED PTHREAD_CANCELED
diff --git a/sysdeps/i386/configure b/sysdeps/i386/configure
index ced0b31d0f..d1c4f7f501 100755
--- a/sysdeps/i386/configure
+++ b/sysdeps/i386/configure
@@ -1,13 +1,468 @@
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stdio.h>
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#ifdef STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# ifdef HAVE_STDLIB_H
+# include <stdlib.h>
+# endif
+#endif
+#ifdef HAVE_STRING_H
+# if !defined STDC_HEADERS && defined HAVE_MEMORY_H
+# include <memory.h>
+# endif
+# include <string.h>
+#endif
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
# This file is generated from configure.in by Autoconf. DO NOT EDIT!
# Local configure fragment for sysdeps/i386.
-{ echo "$as_me:$LINENO: checking if gcc provides <cpuid.h>" >&5
-echo $ECHO_N "checking if gcc provides <cpuid.h>... $ECHO_C" >&6; }
-if test "${libc_cv_gcc_cpuid+set}" = set; then
+
+{ echo "$as_me:$LINENO: checking for grep that handles long lines and -e" >&5
+echo $ECHO_N "checking for grep that handles long lines and -e... $ECHO_C" >&6; }
+if test "${ac_cv_path_GREP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ # Extract the first word of "grep ggrep" to use in msg output
+if test -z "$GREP"; then
+set dummy grep ggrep; ac_prog_name=$2
+if test "${ac_cv_path_GREP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_path_GREP_found=false
+# Loop through the user's path and test for each of PROGNAME-LIST
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in grep ggrep; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+ { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue
+ # Check for GNU ac_path_GREP and select it if it is found.
+ # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+ ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+ ac_count=0
+ echo $ECHO_N "0123456789$ECHO_C" >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ echo 'GREP' >> "conftest.nl"
+ "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ ac_count=`expr $ac_count + 1`
+ if test $ac_count -gt ${ac_path_GREP_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_GREP="$ac_path_GREP"
+ ac_path_GREP_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+
+ $ac_path_GREP_found && break 3
+ done
+done
+
+done
+IFS=$as_save_IFS
+
+
+fi
+
+GREP="$ac_cv_path_GREP"
+if test -z "$GREP"; then
+ { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5
+echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+else
+ ac_cv_path_GREP=$GREP
+fi
+
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_path_GREP" >&5
+echo "${ECHO_T}$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ echo "$as_me:$LINENO: checking for egrep" >&5
+echo $ECHO_N "checking for egrep... $ECHO_C" >&6; }
+if test "${ac_cv_path_EGREP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+ then ac_cv_path_EGREP="$GREP -E"
+ else
+ # Extract the first word of "egrep" to use in msg output
+if test -z "$EGREP"; then
+set dummy egrep; ac_prog_name=$2
+if test "${ac_cv_path_EGREP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_path_EGREP_found=false
+# Loop through the user's path and test for each of PROGNAME-LIST
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_prog in egrep; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
+ { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue
+ # Check for GNU ac_path_EGREP and select it if it is found.
+ # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+ ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+ ac_count=0
+ echo $ECHO_N "0123456789$ECHO_C" >"conftest.in"
+ while :
+ do
+ cat "conftest.in" "conftest.in" >"conftest.tmp"
+ mv "conftest.tmp" "conftest.in"
+ cp "conftest.in" "conftest.nl"
+ echo 'EGREP' >> "conftest.nl"
+ "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+ diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+ ac_count=`expr $ac_count + 1`
+ if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+ # Best one so far, save it but keep looking for a better one
+ ac_cv_path_EGREP="$ac_path_EGREP"
+ ac_path_EGREP_max=$ac_count
+ fi
+ # 10*(2^10) chars as input seems more than enough
+ test $ac_count -gt 10 && break
+ done
+ rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+
+ $ac_path_EGREP_found && break 3
+ done
+done
+
+done
+IFS=$as_save_IFS
+
+
+fi
+
+EGREP="$ac_cv_path_EGREP"
+if test -z "$EGREP"; then
+ { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5
+echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;}
+ { (exit 1); exit 1; }; }
+fi
+
+else
+ ac_cv_path_EGREP=$EGREP
+fi
+
+
+ fi
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_path_EGREP" >&5
+echo "${ECHO_T}$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+{ echo "$as_me:$LINENO: checking for ANSI C header files" >&5
+echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6; }
+if test "${ac_cv_header_stdc+set}" = set; then
echo $ECHO_N "(cached) $ECHO_C" >&6
else
cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_cv_header_stdc=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_cv_header_stdc=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+ # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "memchr" >/dev/null 2>&1; then
+ :
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "free" >/dev/null 2>&1; then
+ :
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+ if test "$cross_compiling" = yes; then
+ :
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <ctype.h>
+#include <stdlib.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+ (('a' <= (c) && (c) <= 'i') \
+ || ('j' <= (c) && (c) <= 'r') \
+ || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ if (XOR (islower (i), ISLOWER (i))
+ || toupper (i) != TOUPPER (i))
+ return 2;
+ return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ :
+else
+ echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+ac_cv_header_stdc=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+fi
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5
+echo "${ECHO_T}$ac_cv_header_stdc" >&6; }
+if test $ac_cv_header_stdc = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define STDC_HEADERS 1
+_ACEOF
+
+fi
+
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+
+
+
+
+
+
+
+
+
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
+ inttypes.h stdint.h unistd.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ eval "$as_ac_Header=yes"
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ eval "$as_ac_Header=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+ { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+ cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+if test "${ac_cv_header_cpuid_h+set}" = set; then
+ { echo "$as_me:$LINENO: checking for cpuid.h" >&5
+echo $ECHO_N "checking for cpuid.h... $ECHO_C" >&6; }
+if test "${ac_cv_header_cpuid_h+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_cpuid_h" >&5
+echo "${ECHO_T}$ac_cv_header_cpuid_h" >&6; }
+else
+ # Is the header compilable?
+{ echo "$as_me:$LINENO: checking cpuid.h usability" >&5
+echo $ECHO_N "checking cpuid.h usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
#include <cpuid.h>
_ACEOF
rm -f conftest.$ac_objext
@@ -27,24 +482,103 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest.$ac_objext; then
- libc_cv_gcc_cpuid=yes
+ ac_header_compiler=yes
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
- libc_cv_gcc_cpuid=no
+ ac_header_compiler=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking cpuid.h presence" >&5
+echo $ECHO_N "checking cpuid.h presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+#include <cpuid.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } >/dev/null && {
+ test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ }; then
+ ac_header_preproc=yes
+else
+ echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_header_preproc=no
fi
-{ echo "$as_me:$LINENO: result: $libc_cv_gcc_cpuid" >&5
-echo "${ECHO_T}$libc_cv_gcc_cpuid" >&6; }
-if test $libc_cv_gcc_cpuid != yes; then
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So? What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+ yes:no: )
+ { echo "$as_me:$LINENO: WARNING: cpuid.h: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: cpuid.h: accepted by the compiler, rejected by the preprocessor!" >&2;}
+ { echo "$as_me:$LINENO: WARNING: cpuid.h: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: cpuid.h: proceeding with the compiler's result" >&2;}
+ ac_header_preproc=yes
+ ;;
+ no:yes:* )
+ { echo "$as_me:$LINENO: WARNING: cpuid.h: present but cannot be compiled" >&5
+echo "$as_me: WARNING: cpuid.h: present but cannot be compiled" >&2;}
+ { echo "$as_me:$LINENO: WARNING: cpuid.h: check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: cpuid.h: check for missing prerequisite headers?" >&2;}
+ { echo "$as_me:$LINENO: WARNING: cpuid.h: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: cpuid.h: see the Autoconf documentation" >&2;}
+ { echo "$as_me:$LINENO: WARNING: cpuid.h: section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: cpuid.h: section \"Present But Cannot Be Compiled\"" >&2;}
+ { echo "$as_me:$LINENO: WARNING: cpuid.h: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: cpuid.h: proceeding with the preprocessor's result" >&2;}
+ { echo "$as_me:$LINENO: WARNING: cpuid.h: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: cpuid.h: in the future, the compiler will take precedence" >&2;}
+
+ ;;
+esac
+{ echo "$as_me:$LINENO: checking for cpuid.h" >&5
+echo $ECHO_N "checking for cpuid.h... $ECHO_C" >&6; }
+if test "${ac_cv_header_cpuid_h+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ ac_cv_header_cpuid_h=$ac_header_preproc
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_cpuid_h" >&5
+echo "${ECHO_T}$ac_cv_header_cpuid_h" >&6; }
+
+fi
+if test $ac_cv_header_cpuid_h = yes; then
+ :
+else
{ { echo "$as_me:$LINENO: error: gcc must provide the <cpuid.h> header" >&5
echo "$as_me: error: gcc must provide the <cpuid.h> header" >&2;}
{ (exit 1); exit 1; }; }
fi
+
+
{ echo "$as_me:$LINENO: checking if -g produces usable source locations for assembler-with-cpp" >&5
echo $ECHO_N "checking if -g produces usable source locations for assembler-with-cpp... $ECHO_C" >&6; }
if test "${libc_cv_cpp_asm_debuginfo+set}" = set; then
diff --git a/sysdeps/i386/configure.in b/sysdeps/i386/configure.in
index 800f928fbd..12dceaf844 100644
--- a/sysdeps/i386/configure.in
+++ b/sysdeps/i386/configure.in
@@ -1,12 +1,8 @@
GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
# Local configure fragment for sysdeps/i386.
-AC_CACHE_CHECK([if gcc provides <cpuid.h>], libc_cv_gcc_cpuid, [dnl
-AC_COMPILE_IFELSE([#include <cpuid.h>], libc_cv_gcc_cpuid=yes,
- libc_cv_gcc_cpuid=no)])
-if test $libc_cv_gcc_cpuid != yes; then
- AC_MSG_ERROR([gcc must provide the <cpuid.h> header])
-fi
+AC_HEADER_CHECK([cpuid.h], ,
+ [AC_MSG_ERROR([gcc must provide the <cpuid.h> header])])
AC_CACHE_CHECK(if -g produces usable source locations for assembler-with-cpp,
libc_cv_cpp_asm_debuginfo, [dnl
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index 33d98c36e6..e1553b284e 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -2,3 +2,14 @@ ifeq ($(subdir),csu)
aux += init-arch
gen-as-const-headers += ifunc-defines.sym
endif
+
+ifeq ($(subdir),string)
+ifeq (yes,$(config-cflags-sse4))
+sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
+CFLAGS-strcspn-c.c += -msse4
+CFLAGS-strpbrk-c.c += -msse4
+CFLAGS-strspn-c.c += -msse4
+CFLAGS-strstr.c += -msse4
+CFLAGS-strcasestr.c += -msse4
+endif
+endif
diff --git a/sysdeps/i386/i686/multiarch/strcasestr-c.c b/sysdeps/i386/i686/multiarch/strcasestr-c.c
new file mode 100644
index 0000000000..0d52b0e47a
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strcasestr-c.c
@@ -0,0 +1,2 @@
+#define __strcasestr_sse2 __strcasestr_ia32
+#include <sysdeps/x86_64/multiarch/strcasestr-c.c>
diff --git a/sysdeps/i386/i686/multiarch/strcasestr.c b/sysdeps/i386/i686/multiarch/strcasestr.c
new file mode 100644
index 0000000000..511bb29ede
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strcasestr.c
@@ -0,0 +1 @@
+#include <sysdeps/x86_64/multiarch/strcasestr.c>
diff --git a/sysdeps/i386/i686/multiarch/strcspn-c.c b/sysdeps/i386/i686/multiarch/strcspn-c.c
new file mode 100644
index 0000000000..6d61e190a8
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strcspn-c.c
@@ -0,0 +1,2 @@
+#define __strcspn_sse2 __strcspn_ia32
+#include <sysdeps/x86_64/multiarch/strcspn-c.c>
diff --git a/sysdeps/i386/i686/multiarch/strcspn.S b/sysdeps/i386/i686/multiarch/strcspn.S
new file mode 100644
index 0000000000..73e7eb45a8
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strcspn.S
@@ -0,0 +1,114 @@
+/* Multiple versions of strcspn
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <config.h>
+
+#ifdef HAVE_SSE4_SUPPORT
+
+#include <sysdep.h>
+#include <ifunc-defines.h>
+
+#ifdef USE_AS_STRPBRK
+#define STRCSPN_SSE42 __strpbrk_sse42
+#define STRCSPN_IA32 __strpbrk_ia32
+#define __GI_STRCSPN __GI_strpbrk
+#else
+#ifndef STRCSPN
+#define STRCSPN strcspn
+#define STRCSPN_SSE42 __strcspn_sse42
+#define STRCSPN_IA32 __strcspn_ia32
+#define __GI_STRCSPN __GI_strcspn
+#endif
+#endif
+
+/* Define multiple versions only for the definition in libc. Don't
+ define multiple versions for strpbrk in static library since we
+ need strpbrk before the initialization happened. */
+#if (defined SHARED || !defined USE_AS_STRPBRK) && !defined NOT_IN_libc
+# ifdef SHARED
+ .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+ .globl __i686.get_pc_thunk.bx
+ .hidden __i686.get_pc_thunk.bx
+ .p2align 4
+ .type __i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+ movl (%esp), %ebx
+ ret
+
+ .text
+ENTRY(STRCSPN)
+ .type STRCSPN, @gnu_indirect_function
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+ call __i686.get_pc_thunk.bx
+ addl $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
+ jne 1f
+ call __init_cpu_features
+1: leal STRCSPN_IA32@GOTOFF(%ebx), %eax
+ testl $(1<<20), CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal STRCSPN_SSE42@GOTOFF(%ebx), %eax
+2: popl %ebx
+ cfi_adjust_cfa_offset (-4);
+ cfi_restore (ebx)
+ ret
+END(STRCSPN)
+# else
+ .text
+ENTRY(STRCSPN)
+ .type STRCSPN, @gnu_indirect_function
+ cmpl $0, KIND_OFFSET+__cpu_features
+ jne 1f
+ call __init_cpu_features
+1: leal STRCSPN_IA32, %eax
+ testl $(1<<20), CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET+__cpu_features
+ jz 2f
+ leal STRCSPN_SSE42, %eax
+2: ret
+END(STRCSPN)
+# endif
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type STRCSPN_IA32, @function; \
+ .globl STRCSPN_IA32; \
+ .p2align 4; \
+ STRCSPN_IA32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size STRCSPN_IA32, .-STRCSPN_IA32
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_STRCSPN; __GI_STRCSPN = STRCSPN_IA32
+#endif
+
+#endif /* HAVE_SSE4_SUPPORT */
+
+#ifdef USE_AS_STRPBRK
+#include "../../strpbrk.S"
+#else
+#include "../../strcspn.S"
+#endif
diff --git a/sysdeps/i386/i686/multiarch/strlen.S b/sysdeps/i386/i686/multiarch/strlen.S
new file mode 100644
index 0000000000..0c1e8646ff
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strlen.S
@@ -0,0 +1,154 @@
+/* Multiple versions of strlen
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <ifunc-defines.h>
+
+/* Define multiple versions only for the definition in libc and for the
+ DSO. In static binaries, we need strlen before the initialization
+ happened. */
+#if defined SHARED && !defined NOT_IN_libc
+ .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+ .globl __i686.get_pc_thunk.bx
+ .hidden __i686.get_pc_thunk.bx
+ .p2align 4
+ .type __i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+ movl (%esp), %ebx
+ ret
+
+ .text
+ENTRY(strlen)
+ .type strlen, @gnu_indirect_function
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+ call __i686.get_pc_thunk.bx
+ addl $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
+ jne 1f
+ call __init_cpu_features
+1: leal __strlen_ia32@GOTOFF(%ebx), %eax
+ testl $(1<<26), CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal __strlen_sse2@GOTOFF(%ebx), %eax
+2: popl %ebx
+ cfi_adjust_cfa_offset (-4);
+ cfi_restore (ebx)
+ ret
+END(strlen)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define RETURN popl %esi; CFI_POP (esi); ret
+
+ .text
+ENTRY (__strlen_sse2)
+/*
+ * This implementation uses SSE instructions to compare up to 16 bytes
+ * at a time looking for the end of string (null char).
+ */
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (%esi, 0)
+ mov 8(%esp), %eax
+ mov %eax, %ecx
+ pxor %xmm0, %xmm0 /* 16 null chars */
+ mov %eax, %esi
+ and $15, %ecx
+ jz 1f /* string is 16 byte aligned */
+
+ /*
+ * Unaligned case. Round down to 16-byte boundary before comparing
+ * 16 bytes for a null char. The code then compensates for any extra chars
+ * preceding the start of the string.
+ */
+ and $-16, %esi
+
+ pcmpeqb (%esi), %xmm0
+ lea 16(%eax), %esi
+ pmovmskb %xmm0, %edx
+
+ shr %cl, %edx /* Compensate for bytes preceding the string */
+ test %edx, %edx
+ jnz 2f
+ sub %ecx, %esi /* no null, adjust to next 16-byte boundary */
+ pxor %xmm0, %xmm0 /* clear xmm0, may have been changed... */
+
+ .p2align 4
+1: /* 16 byte aligned */
+ pcmpeqb (%esi), %xmm0 /* look for null bytes */
+ pmovmskb %xmm0, %edx /* move each byte mask of %xmm0 to edx */
+
+ add $16, %esi /* prepare to search next 16 bytes */
+ test %edx, %edx /* if no null byte, %edx must be 0 */
+ jnz 2f /* found a null */
+
+ pcmpeqb (%esi), %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %esi
+ test %edx, %edx
+ jnz 2f
+
+ pcmpeqb (%esi), %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %esi
+ test %edx, %edx
+ jnz 2f
+
+ pcmpeqb (%esi), %xmm0
+ pmovmskb %xmm0, %edx
+ add $16, %esi
+ test %edx, %edx
+ jz 1b
+
+2:
+ neg %eax
+ lea -16(%eax, %esi), %eax /* calculate exact offset */
+ bsf %edx, %ecx /* Least significant 1 bit is index of null */
+ add %ecx, %eax
+ popl %esi
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (%esi)
+ ret
+
+END (__strlen_sse2)
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __strlen_ia32, @function; \
+ .globl __strlen_ia32; \
+ .p2align 4; \
+ __strlen_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __strlen_ia32, .-__strlen_ia32
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_strlen; __GI_strlen = __strlen_ia32
+#endif
+
+#include "../../i586/strlen.S"
diff --git a/sysdeps/i386/i686/multiarch/strpbrk-c.c b/sysdeps/i386/i686/multiarch/strpbrk-c.c
new file mode 100644
index 0000000000..5db62053b3
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strpbrk-c.c
@@ -0,0 +1,2 @@
+#define __strpbrk_sse2 __strpbrk_ia32
+#include <sysdeps/x86_64/multiarch/strpbrk-c.c>
diff --git a/sysdeps/i386/i686/multiarch/strpbrk.S b/sysdeps/i386/i686/multiarch/strpbrk.S
new file mode 100644
index 0000000000..ed5bca6a94
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strpbrk.S
@@ -0,0 +1,3 @@
+#define STRCSPN strpbrk
+#define USE_AS_STRPBRK
+#include "strcspn.S"
diff --git a/sysdeps/i386/i686/multiarch/strspn-c.c b/sysdeps/i386/i686/multiarch/strspn-c.c
new file mode 100644
index 0000000000..bea09dea71
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strspn-c.c
@@ -0,0 +1,2 @@
+#define __strspn_sse2 __strspn_ia32
+#include <sysdeps/x86_64/multiarch/strspn-c.c>
diff --git a/sysdeps/i386/i686/multiarch/strspn.S b/sysdeps/i386/i686/multiarch/strspn.S
new file mode 100644
index 0000000000..f306d2d1fb
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strspn.S
@@ -0,0 +1,95 @@
+/* Multiple versions of strspn
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <config.h>
+
+#ifdef HAVE_SSE4_SUPPORT
+
+#include <sysdep.h>
+#include <ifunc-defines.h>
+
+/* Define multiple versions only for the definition in libc. */
+#ifndef NOT_IN_libc
+# ifdef SHARED
+ .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+ .globl __i686.get_pc_thunk.bx
+ .hidden __i686.get_pc_thunk.bx
+ .p2align 4
+ .type __i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+ movl (%esp), %ebx
+ ret
+
+ .text
+ENTRY(strspn)
+ .type strspn, @gnu_indirect_function
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+ call __i686.get_pc_thunk.bx
+ addl $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
+ jne 1f
+ call __init_cpu_features
+1: leal __strspn_ia32@GOTOFF(%ebx), %eax
+ testl $(1<<20), CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal __strspn_sse42@GOTOFF(%ebx), %eax
+2: popl %ebx
+ cfi_adjust_cfa_offset (-4);
+ cfi_restore (ebx)
+ ret
+END(strspn)
+# else
+ .text
+ENTRY(strspn)
+ .type strspn, @gnu_indirect_function
+ cmpl $0, KIND_OFFSET+__cpu_features
+ jne 1f
+ call __init_cpu_features
+1: leal __strspn_ia32, %eax
+ testl $(1<<20), CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET+__cpu_features
+ jz 2f
+ leal __strspn_sse42, %eax
+2: ret
+END(strspn)
+# endif
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __strspn_ia32, @function; \
+ .globl __strspn_ia32; \
+ .p2align 4
+ __strspn_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __strspn_ia32, .-__strspn_ia32
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_strspn; __GI_strspn = __strspn_ia32
+#endif
+
+#endif /* HAVE_SSE4_SUPPORT */
+
+#include "../../strspn.S"
diff --git a/sysdeps/i386/i686/multiarch/strstr-c.c b/sysdeps/i386/i686/multiarch/strstr-c.c
new file mode 100644
index 0000000000..7ef1157ce4
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strstr-c.c
@@ -0,0 +1,12 @@
+#include "init-arch.h"
+
+#define STRSTR __strstr_ia32
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name) \
+ __hidden_ver1 (__strstr_ia32, __GI_strstr, __strstr_ia32);
+
+#include "string/strstr.c"
+
+extern char *__strstr_sse42 (const char *, const char *);
+
+libc_ifunc (strstr, HAS_SSE4_2 ? __strstr_sse42 : __strstr_ia32);
diff --git a/sysdeps/i386/i686/multiarch/strstr.c b/sysdeps/i386/i686/multiarch/strstr.c
new file mode 100644
index 0000000000..a97428c125
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strstr.c
@@ -0,0 +1 @@
+#include <sysdeps/x86_64/multiarch/strstr.c>
diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c
index f252fc2c6c..5b66c62eb3 100644
--- a/sysdeps/x86_64/cacheinfo.c
+++ b/sysdeps/x86_64/cacheinfo.c
@@ -516,13 +516,15 @@ init_cacheinfo (void)
shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
}
+ unsigned int ebx_1;
+
#ifdef USE_MULTIARCH
eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
- ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
+ ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
#else
- __cpuid (1, eax, ebx, ecx, edx);
+ __cpuid (1, eax, ebx_1, ecx, edx);
#endif
#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
@@ -554,14 +556,46 @@ init_cacheinfo (void)
}
while (((eax >> 5) & 0x7) != level);
- threads = ((eax >> 14) & 0x3ff) + 1;
+ threads = (eax >> 14) & 0x3ff;
+
+ /* If max_cpuid >= 11, THREADS is the maximum number of
+ addressable IDs for logical processors sharing the
+ cache, instead of the maximum number of threads
+ sharing the cache. */
+ if (threads && max_cpuid >= 11)
+ {
+ /* Find the number of logical processors shipped in
+ one core and apply count mask. */
+ i = 0;
+ while (1)
+ {
+ __cpuid_count (11, i++, eax, ebx, ecx, edx);
+
+ int shipped = ebx & 0xff;
+ int type = ecx & 0xff0;
+ if (shipped == 0 || type == 0)
+ break;
+ else if (type == 0x200)
+ {
+ int count_mask;
+
+ /* Compute count mask. */
+ asm ("bsr %1, %0"
+ : "=r" (count_mask) : "g" (threads));
+ count_mask = ~(-1 << (count_mask + 1));
+ threads = (shipped - 1) & count_mask;
+ break;
+ }
+ }
+ }
+ threads += 1;
}
else
{
intel_bug_no_cache_info:
/* Assume that all logical threads share the highest cache level. */
- threads = (ebx >> 16) & 0xff;
+ threads = (ebx_1 >> 16) & 0xff;
}
/* Cap usage of highest cache level to the number of supported
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index 20da6956f1..f9c60ad5cf 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -146,247 +146,17 @@ L(have_avx):
2: movl %eax, L(have_avx)(%rip)
cmpl $0, %eax
-1: js L(no_avx1)
+1: js L(no_avx)
- /* This is to support AVX audit modules. */
- vmovdqu %ymm0, (LR_VECTOR_OFFSET)(%rsp)
- vmovdqu %ymm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
- vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
- vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
- vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
- vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
- vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
- vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
+# define RESTORE_AVX
+# include "dl-trampoline.h"
- /* Save xmm0-xmm7 registers to detect if any of them are
- changed by audit module. */
- vmovdqa %xmm0, (LR_SIZE)(%rsp)
- vmovdqa %xmm1, (LR_SIZE + XMM_SIZE)(%rsp)
- vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp)
- vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp)
- vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp)
- vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp)
- vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp)
- vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp)
-
-L(no_avx1):
-# endif
-
- movq %rsp, %rcx # La_x86_64_regs pointer to %rcx.
- movq 48(%rbx), %rdx # Load return address if needed.
- movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
- movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
- leaq 16(%rbx), %r8
- call _dl_profile_fixup # Call resolver.
-
- movq %rax, %r11 # Save return value.
-
- movq 8(%rbx), %rax # Get back register content.
- movq LR_RDX_OFFSET(%rsp), %rdx
- movq LR_R8_OFFSET(%rsp), %r8
- movq LR_R9_OFFSET(%rsp), %r9
-
- movaps (LR_XMM_OFFSET)(%rsp), %xmm0
- movaps (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
- movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
- movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
- movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
- movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
- movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
- movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
-
-# ifdef HAVE_AVX_SUPPORT
- cmpl $0, L(have_avx)(%rip)
- js L(no_avx2)
-
- /* Check if any xmm0-xmm7 registers are changed by audit
- module. */
- vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8
- vpmovmskb %xmm8, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0
-
-1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
- vpmovmskb %xmm8, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1
-
-1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
- vpmovmskb %xmm8, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2
-
-1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
- vpmovmskb %xmm8, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3
-
-1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
- vpmovmskb %xmm8, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4
-
-1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
- vpmovmskb %xmm8, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5
-
-1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
- vpmovmskb %xmm8, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6
-
-1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
- vpmovmskb %xmm8, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7
-
-L(no_avx2):
-1:
-# endif
- movq 16(%rbx), %r10 # Anything in framesize?
- testq %r10, %r10
- jns 3f
-
- /* There's nothing in the frame size, so there
- will be no call to the _dl_call_pltexit. */
-
- /* Get back registers content. */
- movq LR_RCX_OFFSET(%rsp), %rcx
- movq LR_RSI_OFFSET(%rsp), %rsi
- movq LR_RDI_OFFSET(%rsp), %rdi
-
- movq %rbx, %rsp
- movq (%rsp), %rbx
- cfi_restore(rbx)
- cfi_def_cfa_register(%rsp)
-
- addq $48, %rsp # Adjust the stack to the return value
- # (eats the reloc index and link_map)
- cfi_adjust_cfa_offset(-48)
- jmp *%r11 # Jump to function address.
-
-3:
- cfi_adjust_cfa_offset(48)
- cfi_rel_offset(%rbx, 0)
- cfi_def_cfa_register(%rbx)
-
- /* At this point we need to prepare new stack for the function
- which has to be called. We copy the original stack to a
- temporary buffer of the size specified by the 'framesize'
- returned from _dl_profile_fixup */
-
- leaq LR_RSP_OFFSET(%rbx), %rsi # stack
- addq $8, %r10
- andq $0xfffffffffffffff0, %r10
- movq %r10, %rcx
- subq %r10, %rsp
- movq %rsp, %rdi
- shrq $3, %rcx
- rep
- movsq
-
- movq 24(%rdi), %rcx # Get back register content.
- movq 32(%rdi), %rsi
- movq 40(%rdi), %rdi
-
- call *%r11
-
- mov 24(%rbx), %rsp # Drop the copied stack content
-
- /* Now we have to prepare the La_x86_64_retval structure for the
- _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now,
- so we just need to allocate the sizeof(La_x86_64_retval) space on
- the stack, since the alignment has already been taken care of. */
-# ifdef HAVE_AVX_SUPPORT
- /* sizeof(La_x86_64_retval). Need extra space for 2 SSE
- registers to detect if xmm0/xmm1 registers are changed
- by audit module. */
- subq $(LRV_SIZE + XMM_SIZE*2), %rsp
-# else
- subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval)
-# endif
- movq %rsp, %rcx # La_x86_64_retval argument to %rcx.
-
- /* Fill in the La_x86_64_retval structure. */
- movq %rax, LRV_RAX_OFFSET(%rcx)
- movq %rdx, LRV_RDX_OFFSET(%rcx)
-
- movaps %xmm0, LRV_XMM0_OFFSET(%rcx)
- movaps %xmm1, LRV_XMM1_OFFSET(%rcx)
-
-# ifdef HAVE_AVX_SUPPORT
- cmpl $0, L(have_avx)(%rip)
- js L(no_avx3)
-
- /* This is to support AVX audit modules. */
- vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx)
- vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx)
-
- /* Save xmm0/xmm1 registers to detect if they are changed
- by audit module. */
- vmovdqa %xmm0, (LRV_SIZE)(%rcx)
- vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx)
-
-L(no_avx3):
-# endif
-
- fstpt LRV_ST0_OFFSET(%rcx)
- fstpt LRV_ST1_OFFSET(%rcx)
-
- movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx.
- movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
- movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
- call _dl_call_pltexit
-
- /* Restore return registers. */
- movq LRV_RAX_OFFSET(%rsp), %rax
- movq LRV_RDX_OFFSET(%rsp), %rdx
-
- movaps LRV_XMM0_OFFSET(%rsp), %xmm0
- movaps LRV_XMM1_OFFSET(%rsp), %xmm1
-
-# ifdef HAVE_AVX_SUPPORT
- cmpl $0, L(have_avx)(%rip)
- js L(no_avx4)
-
- /* Check if xmm0/xmm1 registers are changed by audit module. */
- vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2
- vpmovmskb %xmm2, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0
-
-1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
- vpmovmskb %xmm2, %esi
- cmpl $0xffff, %esi
- jne 1f
- vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
-
-L(no_avx4):
-1:
+ .align 16
+L(no_avx):
# endif
- fldt LRV_ST1_OFFSET(%rsp)
- fldt LRV_ST0_OFFSET(%rsp)
-
- movq %rbx, %rsp
- movq (%rsp), %rbx
- cfi_restore(rbx)
- cfi_def_cfa_register(%rsp)
-
- addq $48, %rsp # Adjust the stack to the return value
- # (eats the reloc index and link_map)
- cfi_adjust_cfa_offset(-48)
- retq
+# undef RESTORE_AVX
+# include "dl-trampoline.h"
cfi_endproc
.size _dl_runtime_profile, .-_dl_runtime_profile
diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h
new file mode 100644
index 0000000000..5d49ed4408
--- /dev/null
+++ b/sysdeps/x86_64/dl-trampoline.h
@@ -0,0 +1,269 @@
+/* Partial PLT profile trampoline to save and restore x86-64 vector
+ registers.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifdef RESTORE_AVX
+ /* This is to support AVX audit modules. */
+ vmovdqu %ymm0, (LR_VECTOR_OFFSET)(%rsp)
+ vmovdqu %ymm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
+ vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
+ vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
+ vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
+ vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
+ vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
+ vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
+
+ /* Save xmm0-xmm7 registers to detect if any of them are
+ changed by audit module. */
+ vmovdqa %xmm0, (LR_SIZE)(%rsp)
+ vmovdqa %xmm1, (LR_SIZE + XMM_SIZE)(%rsp)
+ vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp)
+ vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp)
+ vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp)
+ vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp)
+ vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp)
+ vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp)
+#endif
+
+ movq %rsp, %rcx # La_x86_64_regs pointer to %rcx.
+ movq 48(%rbx), %rdx # Load return address if needed.
+ movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
+ movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
+ leaq 16(%rbx), %r8
+ call _dl_profile_fixup # Call resolver.
+
+ movq %rax, %r11 # Save return value.
+
+ movq 8(%rbx), %rax # Get back register content.
+ movq LR_RDX_OFFSET(%rsp), %rdx
+ movq LR_R8_OFFSET(%rsp), %r8
+ movq LR_R9_OFFSET(%rsp), %r9
+
+ movaps (LR_XMM_OFFSET)(%rsp), %xmm0
+ movaps (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
+ movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
+ movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
+ movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
+ movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
+ movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
+ movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
+
+#ifdef RESTORE_AVX
+ /* Check if any xmm0-xmm7 registers are changed by audit
+ module. */
+ vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 2f
+ vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp)
+ jmp 1f
+2: vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0
+ vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp)
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 2f
+ vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
+ jmp 1f
+2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1
+ vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 2f
+ vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
+ jmp 1f
+2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2
+ vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 2f
+ vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
+ jmp 1f
+2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3
+ vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 2f
+ vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
+ jmp 1f
+2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4
+ vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 2f
+ vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
+ jmp 1f
+2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5
+ vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 2f
+ vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
+ jmp 1f
+2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6
+ vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
+
+1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
+ vpmovmskb %xmm8, %esi
+ cmpl $0xffff, %esi
+ je 2f
+ vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
+ jmp 1f
+2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7
+ vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
+
+1:
+#endif
+ movq 16(%rbx), %r10 # Anything in framesize?
+ testq %r10, %r10
+ jns 3f
+
+ /* There's nothing in the frame size, so there
+ will be no call to the _dl_call_pltexit. */
+
+ /* Get back registers content. */
+ movq LR_RCX_OFFSET(%rsp), %rcx
+ movq LR_RSI_OFFSET(%rsp), %rsi
+ movq LR_RDI_OFFSET(%rsp), %rdi
+
+ movq %rbx, %rsp
+ movq (%rsp), %rbx
+ cfi_restore(rbx)
+ cfi_def_cfa_register(%rsp)
+
+ addq $48, %rsp # Adjust the stack to the return value
+ # (eats the reloc index and link_map)
+ cfi_adjust_cfa_offset(-48)
+ jmp *%r11 # Jump to function address.
+
+3:
+ cfi_adjust_cfa_offset(48)
+ cfi_rel_offset(%rbx, 0)
+ cfi_def_cfa_register(%rbx)
+
+ /* At this point we need to prepare new stack for the function
+ which has to be called. We copy the original stack to a
+ temporary buffer of the size specified by the 'framesize'
+ returned from _dl_profile_fixup */
+
+ leaq LR_RSP_OFFSET(%rbx), %rsi # stack
+ addq $8, %r10
+ andq $0xfffffffffffffff0, %r10
+ movq %r10, %rcx
+ subq %r10, %rsp
+ movq %rsp, %rdi
+ shrq $3, %rcx
+ rep
+ movsq
+
+ movq 24(%rdi), %rcx # Get back register content.
+ movq 32(%rdi), %rsi
+ movq 40(%rdi), %rdi
+
+ call *%r11
+
+ mov 24(%rbx), %rsp # Drop the copied stack content
+
+ /* Now we have to prepare the La_x86_64_retval structure for the
+ _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now,
+ so we just need to allocate the sizeof(La_x86_64_retval) space on
+ the stack, since the alignment has already been taken care of. */
+# ifdef RESTORE_AVX
+ /* sizeof(La_x86_64_retval). Need extra space for 2 SSE
+ registers to detect if xmm0/xmm1 registers are changed
+ by audit module. */
+ subq $(LRV_SIZE + XMM_SIZE*2), %rsp
+# else
+ subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval)
+# endif
+ movq %rsp, %rcx # La_x86_64_retval argument to %rcx.
+
+ /* Fill in the La_x86_64_retval structure. */
+ movq %rax, LRV_RAX_OFFSET(%rcx)
+ movq %rdx, LRV_RDX_OFFSET(%rcx)
+
+ movaps %xmm0, LRV_XMM0_OFFSET(%rcx)
+ movaps %xmm1, LRV_XMM1_OFFSET(%rcx)
+
+# ifdef RESTORE_AVX
+ /* This is to support AVX audit modules. */
+ vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx)
+ vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx)
+
+ /* Save xmm0/xmm1 registers to detect if they are changed
+ by audit module. */
+ vmovdqa %xmm0, (LRV_SIZE)(%rcx)
+ vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx)
+# endif
+
+ fstpt LRV_ST0_OFFSET(%rcx)
+ fstpt LRV_ST1_OFFSET(%rcx)
+
+ movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx.
+ movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
+ movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
+ call _dl_call_pltexit
+
+ /* Restore return registers. */
+ movq LRV_RAX_OFFSET(%rsp), %rax
+ movq LRV_RDX_OFFSET(%rsp), %rdx
+
+ movaps LRV_XMM0_OFFSET(%rsp), %xmm0
+ movaps LRV_XMM1_OFFSET(%rsp), %xmm1
+
+# ifdef RESTORE_AVX
+ /* Check if xmm0/xmm1 registers are changed by audit module. */
+ vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2
+ vpmovmskb %xmm2, %esi
+ cmpl $0xffff, %esi
+ jne 1f
+ vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0
+
+1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
+ vpmovmskb %xmm2, %esi
+ cmpl $0xffff, %esi
+ jne 1f
+ vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
+
+1:
+# endif
+
+ fldt LRV_ST1_OFFSET(%rsp)
+ fldt LRV_ST0_OFFSET(%rsp)
+
+ movq %rbx, %rsp
+ movq (%rsp), %rbx
+ cfi_restore(rbx)
+ cfi_def_cfa_register(%rsp)
+
+ addq $48, %rsp # Adjust the stack to the return value
+ # (eats the reloc index and link_map)
+ cfi_adjust_cfa_offset(-48)
+ retq
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index b066402204..0ded3b3261 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -4,7 +4,7 @@ gen-as-const-headers += ifunc-defines.sym
endif
ifeq ($(subdir),string)
-sysdep_routines += stpncpy-c strncpy-c
+sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
CFLAGS-strcspn-c.c += -msse4
diff --git a/sysdeps/x86_64/multiarch/rawmemchr.S b/sysdeps/x86_64/multiarch/rawmemchr.S
index d4f265f430..08fd8769fc 100644
--- a/sysdeps/x86_64/multiarch/rawmemchr.S
+++ b/sysdeps/x86_64/multiarch/rawmemchr.S
@@ -38,6 +38,7 @@ END(rawmemchr)
strong_alias (rawmemchr, __rawmemchr)
+ .section .text.sse4.2,"ax",@progbits
.align 16
.type __rawmemchr_sse42, @function
__rawmemchr_sse42:
diff --git a/sysdeps/x86_64/multiarch/strcmp-ssse3.S b/sysdeps/x86_64/multiarch/strcmp-ssse3.S
new file mode 100644
index 0000000000..98cecb8942
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcmp-ssse3.S
@@ -0,0 +1,3 @@
+#define USE_SSSE3 1
+#define STRCMP __strcmp_ssse3
+#include "../strcmp.S"
diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S
index 1a315737af..05adf1e2e6 100644
--- a/sysdeps/x86_64/multiarch/strcmp.S
+++ b/sysdeps/x86_64/multiarch/strcmp.S
@@ -34,6 +34,7 @@
mov %r9, %r11
#define STRCMP_SSE42 __strncmp_sse42
+#define STRCMP_SSSE3 __strncmp_ssse3
#define STRCMP_SSE2 __strncmp_sse2
#define __GI_STRCMP __GI_strncmp
#else
@@ -41,6 +42,7 @@
#ifndef STRCMP
#define STRCMP strcmp
#define STRCMP_SSE42 __strcmp_sse42
+#define STRCMP_SSSE3 __strcmp_ssse3
#define STRCMP_SSE2 __strcmp_sse2
#define __GI_STRCMP __GI_strcmp
#endif
@@ -60,10 +62,14 @@ ENTRY(STRCMP)
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
jne 1f
call __init_cpu_features
-1: leaq STRCMP_SSE2(%rip), %rax
- testl $(1<<20), __cpu_features+CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET(%rip)
- jz 2f
+1:
leaq STRCMP_SSE42(%rip), %rax
+ testl $(1<<20), __cpu_features+CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET(%rip)
+ jnz 2f
+ leaq STRCMP_SSSE3(%rip), %rax
+ testl $(1<<9), __cpu_features+CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET(%rip)
+ jnz 2f
+ leaq STRCMP_SSE2(%rip), %rax
2: ret
END(STRCMP)
diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c
index 4512267d3f..daeebe1bf5 100644
--- a/sysdeps/x86_64/multiarch/strcspn-c.c
+++ b/sysdeps/x86_64/multiarch/strcspn-c.c
@@ -86,11 +86,13 @@ STRCSPN_SSE42 (const char *s, const char *a)
const char *aligned;
__m128i mask;
+ /* Fake initialization. gcc otherwise will warn. */
+ asm ("" : "=xm" (mask));
int offset = (int) ((size_t) a & 15);
if (offset != 0)
{
/* Load masks. */
- aligned = (const char *) ((size_t) a & 0xfffffffffffffff0L);
+ aligned = (const char *) ((size_t) a & -16L);
__m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
switch (offset)
@@ -229,7 +231,7 @@ STRCSPN_SSE42 (const char *s, const char *a)
if (offset != 0)
{
/* Check partial string. */
- aligned = (const char *) ((size_t) s & 0xfffffffffffffff0L);
+ aligned = (const char *) ((size_t) s & -16L);
__m128i value = _mm_load_si128 ((__m128i *) aligned);
switch (offset)
diff --git a/sysdeps/x86_64/multiarch/strlen.S b/sysdeps/x86_64/multiarch/strlen.S
index 82b03ccc28..4342c6cdab 100644
--- a/sysdeps/x86_64/multiarch/strlen.S
+++ b/sysdeps/x86_64/multiarch/strlen.S
@@ -40,6 +40,7 @@ ENTRY(strlen)
END(strlen)
+ .section .text.sse4.2,"ax",@progbits
.align 16
.type __strlen_sse42, @function
__strlen_sse42:
diff --git a/sysdeps/x86_64/multiarch/strncmp-ssse3.S b/sysdeps/x86_64/multiarch/strncmp-ssse3.S
new file mode 100644
index 0000000000..a320a3e949
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncmp-ssse3.S
@@ -0,0 +1,4 @@
+#define USE_SSSE3 1
+#define STRCMP __strncmp_ssse3
+#define USE_AS_STRNCMP
+#include "../strcmp.S"
diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c
index 5b99f0d383..be9e8ac0a8 100644
--- a/sysdeps/x86_64/multiarch/strspn-c.c
+++ b/sysdeps/x86_64/multiarch/strspn-c.c
@@ -68,7 +68,7 @@ __strspn_sse42 (const char *s, const char *a)
if (offset != 0)
{
/* Load masks. */
- aligned = (const char *) ((size_t) a & 0xfffffffffffffff0L);
+ aligned = (const char *) ((size_t) a & -16L);
__m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
switch (offset)
@@ -207,7 +207,7 @@ __strspn_sse42 (const char *s, const char *a)
if (offset != 0)
{
/* Check partial string. */
- aligned = (const char *) ((size_t) s & 0xfffffffffffffff0L);
+ aligned = (const char *) ((size_t) s & -16L);
__m128i value = _mm_load_si128 ((__m128i *) aligned);
switch (offset)
diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S
index 340a64ba35..650ec173b6 100644
--- a/sysdeps/x86_64/strcmp.S
+++ b/sysdeps/x86_64/strcmp.S
@@ -51,7 +51,12 @@
# endif
#endif
+#ifndef USE_SSSE3
.text
+#else
+ .section .text.ssse3,"ax",@progbits
+#endif
+
ENTRY (BP_SYM (STRCMP))
#ifdef NOT_IN_libc
/* Simple version since we can't use SSE registers in ld.so. */
@@ -244,9 +249,13 @@ LABEL(gobble_ashr_1):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4 /* store for next cycle */
+#ifndef USE_SSSE3
psrldq $1, %xmm3
pslldq $15, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -269,9 +278,13 @@ LABEL(gobble_ashr_1):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4 /* store for next cycle */
+#ifndef USE_SSSE3
psrldq $1, %xmm3
- pslldq $15, %xmm2
+ pslldq $15, %xmm2
por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -363,9 +376,13 @@ LABEL(gobble_ashr_2):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $2, %xmm3
pslldq $14, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -389,9 +406,13 @@ LABEL(gobble_ashr_2):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $2, %xmm3
- pslldq $14, %xmm2
- por %xmm3, %xmm2
+ pslldq $14, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -477,9 +498,13 @@ LABEL(gobble_ashr_3):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $3, %xmm3
pslldq $13, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -503,9 +528,13 @@ LABEL(gobble_ashr_3):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $3, %xmm3
- pslldq $13, %xmm2
- por %xmm3, %xmm2
+ pslldq $13, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -591,9 +620,13 @@ LABEL(gobble_ashr_4):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $4, %xmm3
pslldq $12, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -617,9 +650,13 @@ LABEL(gobble_ashr_4):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $4, %xmm3
- pslldq $12, %xmm2
- por %xmm3, %xmm2
+ pslldq $12, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -705,9 +742,13 @@ LABEL(gobble_ashr_5):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $5, %xmm3
pslldq $11, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -731,9 +772,13 @@ LABEL(gobble_ashr_5):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $5, %xmm3
- pslldq $11, %xmm2
- por %xmm3, %xmm2
+ pslldq $11, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -819,9 +864,13 @@ LABEL(gobble_ashr_6):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $6, %xmm3
pslldq $10, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -845,9 +894,13 @@ LABEL(gobble_ashr_6):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $6, %xmm3
- pslldq $10, %xmm2
- por %xmm3, %xmm2
+ pslldq $10, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -933,9 +986,13 @@ LABEL(gobble_ashr_7):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $7, %xmm3
pslldq $9, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -959,9 +1016,13 @@ LABEL(gobble_ashr_7):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $7, %xmm3
- pslldq $9, %xmm2
- por %xmm3, %xmm2
+ pslldq $9, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1047,9 +1108,13 @@ LABEL(gobble_ashr_8):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $8, %xmm3
pslldq $8, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1073,9 +1138,13 @@ LABEL(gobble_ashr_8):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $8, %xmm3
- pslldq $8, %xmm2
- por %xmm3, %xmm2
+ pslldq $8, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1161,9 +1230,13 @@ LABEL(gobble_ashr_9):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $9, %xmm3
pslldq $7, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1187,9 +1260,13 @@ LABEL(gobble_ashr_9):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $9, %xmm3
- pslldq $7, %xmm2
- por %xmm3, %xmm2
+ pslldq $7, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1275,9 +1352,13 @@ LABEL(gobble_ashr_10):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $10, %xmm3
pslldq $6, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1301,9 +1382,13 @@ LABEL(gobble_ashr_10):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $10, %xmm3
- pslldq $6, %xmm2
- por %xmm3, %xmm2
+ pslldq $6, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1389,9 +1474,13 @@ LABEL(gobble_ashr_11):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $11, %xmm3
pslldq $5, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1415,9 +1504,13 @@ LABEL(gobble_ashr_11):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $11, %xmm3
- pslldq $5, %xmm2
- por %xmm3, %xmm2
+ pslldq $5, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1503,9 +1596,13 @@ LABEL(gobble_ashr_12):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $12, %xmm3
pslldq $4, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1529,9 +1626,13 @@ LABEL(gobble_ashr_12):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $12, %xmm3
- pslldq $4, %xmm2
- por %xmm3, %xmm2
+ pslldq $4, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1617,9 +1718,13 @@ LABEL(gobble_ashr_13):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $13, %xmm3
pslldq $3, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1643,9 +1748,13 @@ LABEL(gobble_ashr_13):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $13, %xmm3
- pslldq $3, %xmm2
- por %xmm3, %xmm2
+ pslldq $3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1731,9 +1840,13 @@ LABEL(gobble_ashr_14):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $14, %xmm3
pslldq $2, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1757,9 +1870,13 @@ LABEL(gobble_ashr_14):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $14, %xmm3
- pslldq $2, %xmm2
- por %xmm3, %xmm2
+ pslldq $2, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1847,9 +1964,13 @@ LABEL(gobble_ashr_15):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $15, %xmm3
pslldq $1, %xmm2
- por %xmm3, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
@@ -1873,9 +1994,13 @@ LABEL(gobble_ashr_15):
movdqa (%rdi, %rcx), %xmm2
movdqa %xmm2, %xmm4
+#ifndef USE_SSSE3
psrldq $15, %xmm3
- pslldq $1, %xmm2
- por %xmm3, %xmm2
+ pslldq $1, %xmm2
+ por %xmm3, %xmm2 /* merge into one 16byte value */
+#else
+ palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
+#endif
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1