summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjoseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d>2010-03-13 18:20:12 +0000
committerjoseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d>2010-03-13 18:20:12 +0000
commit984124d726e087943f1415337c50a1bff32cfc17 (patch)
treec6f715c85c46ad0c97af31cff88257aca141d3ac
parent3b289e20aa9fa02a85745547a976b824bc66d096 (diff)
downloadeglibc2-984124d726e087943f1415337c50a1bff32cfc17.tar.gz
Merge changes between r9801 and r10031 from /fsf/trunk.
git-svn-id: svn://svn.eglibc.org/trunk@10032 7b3dc134-2b1b-0410-93df-9e9f96275f8d
-rw-r--r--libc/ChangeLog443
-rw-r--r--libc/README21
-rw-r--r--libc/config.make.in1
-rwxr-xr-xlibc/configure2
-rw-r--r--libc/configure.in1
-rw-r--r--libc/elf/dl-dst.h21
-rw-r--r--libc/elf/dl-load.c14
-rw-r--r--libc/elf/elf.h36
-rw-r--r--libc/elf/tls-macros.h234
-rw-r--r--libc/hurd/hurd/ioctl.h2
-rw-r--r--libc/hurd/hurdioctl.c44
-rw-r--r--libc/include/fenv.h1
-rw-r--r--libc/io/ftw.c3
-rw-r--r--libc/libio/iovdprintf.c5
-rw-r--r--libc/malloc/malloc.c16
-rw-r--r--libc/manual/charset.texi10
-rw-r--r--libc/manual/errno.texi2
-rw-r--r--libc/manual/getopt.texi2
-rw-r--r--libc/manual/math.texi2
-rw-r--r--libc/manual/memory.texi2
-rw-r--r--libc/manual/message.texi2
-rw-r--r--libc/manual/resource.texi2
-rw-r--r--libc/manual/stdio.texi2
-rw-r--r--libc/manual/time.texi2
-rw-r--r--libc/math/fegetenv.c1
-rw-r--r--libc/math/math_private.h2
-rw-r--r--libc/nptl/ChangeLog25
-rw-r--r--libc/nptl/allocatestack.c22
-rw-r--r--libc/nptl/pthread_create.c36
-rw-r--r--libc/nptl/sysdeps/pthread/createthread.c25
-rw-r--r--libc/resolv/res_send.c39
-rwxr-xr-xlibc/sysdeps/i386/configure19
-rw-r--r--libc/sysdeps/i386/configure.in8
-rw-r--r--libc/sysdeps/i386/fpu/fegetenv.c1
-rw-r--r--libc/sysdeps/i386/i686/Makefile16
-rw-r--r--libc/sysdeps/i386/i686/multiarch/Makefile4
-rw-r--r--libc/sysdeps/i386/i686/multiarch/memcmp-sse4.S1004
-rw-r--r--libc/sysdeps/i386/i686/multiarch/memcmp-ssse3.S1966
-rw-r--r--libc/sysdeps/i386/i686/multiarch/memcmp.S88
-rw-r--r--libc/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S246
-rw-r--r--libc/sysdeps/i386/i686/multiarch/memcpy-ssse3.S113
-rw-r--r--libc/sysdeps/i386/i686/multiarch/memset-sse2-rep.S14
-rw-r--r--libc/sysdeps/i386/i686/multiarch/memset-sse2.S19
-rw-r--r--libc/sysdeps/i386/i686/multiarch/strcmp-sse4.S380
-rw-r--r--libc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S2220
-rw-r--r--libc/sysdeps/i386/i686/multiarch/strcmp.S115
-rw-r--r--libc/sysdeps/i386/i686/multiarch/strncmp-c.c8
-rw-r--r--libc/sysdeps/i386/i686/multiarch/strncmp-sse4.S5
-rw-r--r--libc/sysdeps/i386/i686/multiarch/strncmp-ssse3.S5
-rw-r--r--libc/sysdeps/i386/i686/multiarch/strncmp.S3
-rw-r--r--libc/sysdeps/i386/lshift.S4
-rw-r--r--libc/sysdeps/i386/rshift.S8
-rw-r--r--libc/sysdeps/ia64/fpu/fegetenv.c1
-rw-r--r--libc/sysdeps/ia64/memchr.S26
-rw-r--r--libc/sysdeps/powerpc/fpu/fegetenv.c1
-rw-r--r--libc/sysdeps/powerpc/powerpc32/configure9
-rw-r--r--libc/sysdeps/powerpc/powerpc32/configure.in4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/dl-machine.h7
-rw-r--r--libc/sysdeps/powerpc/powerpc32/dl-start.S5
-rw-r--r--libc/sysdeps/powerpc/powerpc32/elf/start.S10
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S5
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/s_ceil.S7
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S7
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/s_floor.S7
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/s_floorf.S7
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/s_lround.S7
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/s_rint.S7
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/s_rintf.S7
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/s_round.S6
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/s_roundf.S6
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/s_trunc.S7
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/s_truncf.S7
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S5
-rw-r--r--libc/sysdeps/powerpc/powerpc32/memset.S7
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S6
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S7
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S7
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S7
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S7
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/Implies1
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/fpu/Implies1
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S89
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finitef.S1
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S88
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinff.S1
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S92
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S1
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/memcpy.S469
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/Implies1
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/fpu/Implies1
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S68
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S1
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S71
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S1
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S69
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S1
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/memcpy.S449
-rw-r--r--libc/sysdeps/s390/fpu/fegetenv.c5
-rw-r--r--libc/sysdeps/s390/s390-64/utf16-utf32-z9.c11
-rw-r--r--libc/sysdeps/s390/s390-64/utf8-utf16-z9.c9
-rw-r--r--libc/sysdeps/sh/sh4/fpu/fegetenv.c1
-rw-r--r--libc/sysdeps/sparc/Makefile5
-rw-r--r--libc/sysdeps/sparc/elf/rtld-global-offsets.sym7
-rw-r--r--libc/sysdeps/sparc/fpu/fegetenv.c1
-rw-r--r--libc/sysdeps/sparc/sparc32/bcopy.c1
-rw-r--r--libc/sysdeps/sparc/sparc32/dl-irel.h55
-rw-r--r--libc/sysdeps/sparc/sparc32/dl-machine.h97
-rw-r--r--libc/sysdeps/sparc/sparc32/dl-plt.h97
-rw-r--r--libc/sysdeps/sparc/sparc32/memcpy.S356
-rw-r--r--libc/sysdeps/sparc/sparc32/memmove.c1
-rw-r--r--libc/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S215
-rw-r--r--libc/sysdeps/sparc/sparc32/sparcv9/bcopy.c1
-rw-r--r--libc/sysdeps/sparc/sparc32/sparcv9/memmove.c1
-rw-r--r--libc/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile4
-rw-r--r--libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara1.S2
-rw-r--r--libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara2.S2
-rw-r--r--libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra3.S2
-rw-r--r--libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy.S4
-rw-r--r--libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara1.S2
-rw-r--r--libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memset.S4
-rw-r--r--libc/sysdeps/sparc/sparc32/sparcv9/sparcv9b/memcpy.S2
-rw-r--r--libc/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memcpy.S2
-rw-r--r--libc/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memset.S2
-rw-r--r--libc/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memcpy.S2
-rw-r--r--libc/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memset.S2
-rw-r--r--libc/sysdeps/sparc/sparc32/sparcv9/strlen.S3
-rw-r--r--libc/sysdeps/sparc/sparc32/strlen.S128
-rw-r--r--libc/sysdeps/sparc/sparc32/udiv_qrnnd.S168
-rw-r--r--libc/sysdeps/sparc/sparc64/Implies1
-rw-r--r--libc/sysdeps/sparc/sparc64/Makefile4
-rw-r--r--libc/sysdeps/sparc/sparc64/align-cpy.S85
-rw-r--r--libc/sysdeps/sparc/sparc64/bcopy.c1
-rw-r--r--libc/sysdeps/sparc/sparc64/dl-irel.h58
-rw-r--r--libc/sysdeps/sparc/sparc64/dl-machine.h182
-rw-r--r--libc/sysdeps/sparc/sparc64/dl-plt.h163
-rw-r--r--libc/sysdeps/sparc/sparc64/memchr.S2
-rw-r--r--libc/sysdeps/sparc/sparc64/memcopy.h1
-rw-r--r--libc/sysdeps/sparc/sparc64/memcpy.S353
-rw-r--r--libc/sysdeps/sparc/sparc64/memmove.c1
-rw-r--r--libc/sysdeps/sparc/sparc64/multiarch/Makefile4
-rw-r--r--libc/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S (renamed from libc/sysdeps/sparc/sparc64/sparcv9v/memcpy.S)266
-rw-r--r--libc/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S (renamed from libc/sysdeps/sparc/sparc64/sparcv9v2/memcpy.S)266
-rw-r--r--libc/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S320
-rw-r--r--libc/sysdeps/sparc/sparc64/multiarch/memcpy.S107
-rw-r--r--libc/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S (renamed from libc/sysdeps/sparc/sparc64/sparcv9v/memset.S)17
-rw-r--r--libc/sysdeps/sparc/sparc64/multiarch/memset.S145
-rw-r--r--libc/sysdeps/sparc/sparc64/sparcv9b/memcpy.S610
-rw-r--r--libc/sysdeps/sparc/sparc64/sparcv9v2/memset.S1
-rw-r--r--libc/sysdeps/sparc/sparc64/strlen.S210
-rw-r--r--libc/sysdeps/unix/sysv/linux/bits/in.h30
-rw-r--r--libc/sysdeps/unix/sysv/linux/internal_statvfs.c17
-rw-r--r--libc/sysdeps/unix/sysv/linux/linux_fsinfo.h61
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/____longjmp_chk.S7
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/brk.S7
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S5
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/power7/fpu/Implies1
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S5
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S5
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/power7/fpu/Implies1
-rw-r--r--libc/sysdeps/unix/sysv/linux/s390/s390-32/utmp32.c20
-rw-r--r--libc/sysdeps/unix/sysv/linux/s390/s390-32/utmpx32.c18
-rw-r--r--libc/sysdeps/unix/sysv/linux/s390/s390-32/utmpx32.h14
-rw-r--r--libc/sysdeps/unix/sysv/linux/sparc/Versions4
-rw-r--r--libc/sysdeps/unix/sysv/linux/sparc/sparc32/makecontext.c2
-rw-r--r--libc/sysdeps/unix/sysv/linux/sparc/sparc64/msgrcv.c49
-rw-r--r--libc/sysdeps/unix/sysv/linux/sys/mount.h4
-rw-r--r--libc/sysdeps/x86_64/Implies1
-rw-r--r--libc/sysdeps/x86_64/dl-machine.h4
-rw-r--r--libc/sysdeps/x86_64/fpu/fegetenv.c1
-rw-r--r--ports/ChangeLog.arm5
-rw-r--r--ports/ChangeLog.hppa20
-rw-r--r--ports/ChangeLog.m68k75
-rw-r--r--ports/ChangeLog.mips4
-rw-r--r--ports/ChangeLog.powerpc4
-rw-r--r--ports/sysdeps/arm/eabi/fegetenv.c3
-rw-r--r--ports/sysdeps/arm/fpu/fegetenv.c3
-rw-r--r--ports/sysdeps/hppa/nptl/tls.h6
-rw-r--r--ports/sysdeps/m68k/dl-machine.h34
-rw-r--r--ports/sysdeps/m68k/dl-tls.h48
-rw-r--r--ports/sysdeps/m68k/fpu/fegetenv.c3
-rw-r--r--ports/sysdeps/m68k/jmpbuf-unwind.h23
-rw-r--r--ports/sysdeps/m68k/libc-tls.c38
-rw-r--r--ports/sysdeps/m68k/m680x0/fpu/bits/mathinline.h34
-rw-r--r--ports/sysdeps/m68k/m680x0/fpu/s_ccos.c4
-rw-r--r--ports/sysdeps/m68k/m680x0/fpu/s_ccosh.c4
-rw-r--r--ports/sysdeps/m68k/nptl/Makefile22
-rw-r--r--ports/sysdeps/m68k/nptl/pthread_spin_lock.c31
-rw-r--r--ports/sysdeps/m68k/nptl/pthread_spin_trylock.c28
-rw-r--r--ports/sysdeps/m68k/nptl/pthreaddef.h39
-rw-r--r--ports/sysdeps/m68k/nptl/tcb-offsets.sym11
-rw-r--r--ports/sysdeps/m68k/nptl/tls.h171
-rw-r--r--ports/sysdeps/m68k/tls-macros.h70
-rw-r--r--ports/sysdeps/mips/fpu/fegetenv.c3
-rw-r--r--ports/sysdeps/powerpc/nofpu/fegetenv.c3
-rw-r--r--ports/sysdeps/unix/sysv/linux/hppa/bits/mman.h2
-rw-r--r--ports/sysdeps/unix/sysv/linux/hppa/bits/socket.h2
-rw-r--r--ports/sysdeps/unix/sysv/linux/hppa/makecontext.c61
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/Makefile6
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/Versions14
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/bits/m68k-vdso.h61
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/bits/sigcontext.h62
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/bits/siginfo.h316
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/clone.S52
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/coldfire/m68k-helpers.S105
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/coldfire/nptl/bits/atomic.h105
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/init-first.c74
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/libc-m68k-vdso.c1
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/m680x0/m68k-helpers.S103
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/m68k-vdso.c35
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/nptl/bits/pthreadtypes.h172
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/nptl/bits/semaphore.h36
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/nptl/clone.S2
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/nptl/createthread.c25
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/nptl/fork.c30
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/nptl/lowlevellock.h280
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/nptl/pt-vfork.S36
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/nptl/pthread_once.c91
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/nptl/sysdep-cancel.h141
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/nptl/vfork.S38
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/register-dump.h32
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/socket.S28
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/sysdep.h56
-rw-r--r--ports/sysdeps/unix/sysv/linux/m68k/vfork.S18
223 files changed, 11941 insertions, 3834 deletions
diff --git a/libc/ChangeLog b/libc/ChangeLog
index a700332b1..e8781598a 100644
--- a/libc/ChangeLog
+++ b/libc/ChangeLog
@@ -1,3 +1,446 @@
+2010-03-12 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/unix/sysv/linux/sys/mount.h (UMOUNT_NOFOLLOW): Define.
+
+2010-03-12 Andreas Schwab <schwab@redhat.com>
+
+ * elf/dl-dst.h: Include "trusted-dirs.h".
+ (DL_DST_REQUIRED): Take $LIB into account.
+
+2010-03-11 Roland McGrath <roland@redhat.com>
+
+ * elf/elf.h (R_X86_64_GOT64, R_X86_64_GOTPCREL64): New macros.
+ (R_X86_64_GOTPC64, R_X86_64_GOTPLT64, R_X86_64_PLTOFF64): New macros.
+
+2010-03-10 Luis Machado <luisgpm@br.ibm.com>
+
+ * sysdeps/powerpc/powerpc64/power7/memcpy.S: New file.
+ * sysdeps/powerpc/powerpc32/power7/memcpy.S: New file.
+
+2010-03-09 David S. Miller <davem@davemloft.net>
+
+ * sysdeps/sparc/sparc64/dl-machine.h (elf_machine_rela): Handling
+ of R_SPARC_TLS_LE_* needs to use 32-bit loads and stores, not
+ 64-bit ones.
+
+ * sysdeps/sparc/sparc32/memcpy.S: Fix build.
+
+ * sysdeps/sparc/sparc32/strlen.S: Optimize.
+ * sysdeps/sparc/sparc64/strlen.S: Likewise.
+ * sysdeps/sparc/sparc32/sparcv9/strlen.S (ASI_PNF, ASI_BLK_P,
+ XCC): Delete definitions, not needed.
+
+2010-03-07 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/unix/sysv/linux/internal_statvfs.c (__statvfs_getflags):
+ Little follow-up patch from last change.
+
+2010-03-06 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/unix/sysv/linux/internal_statvfs.c: Handle ext4 and logfs.
+ * sysdeps/unix/sysv/linux/linux_fsinfo.h: Add entry for logfs.
+
+2010-02-18 Yann Droneaud <yann@droneaud.fr>
+
+ * resolv/res_send.c: Fixed DEBUG statements.
+ Moved tmpbuf declaration to block beginning and
+ updated pointer names used in multiple DprintQ() calls
+
+2010-02-18 H.J. Lu <hongjiu.lu@intel.com>
+
+ * config.make.in (config-asflags-i686): Define.
+ * configure.in: Substitute libc_cv_as_i686.
+ * sysdeps/i386/configure.in: Check if assembler supports -mtune=i686.
+ * sysdeps/i386/i686/Makefile (CFLAGS-.oX): Add -Wa,-mtune=i686
+ if assembler supports -mtune=i686.
+ (ASFLAGS-.oX): Likewise.
+
+2010-03-04 Ulrich Drepper <drepper@redhat.com>
+
+ * elf/elf.h (R_X86_64_SIZE32): Define.
+ (R_X86_64_SIZE64): Define.
+
+2010-03-02 Richard Guenther <rguenther@suse.de>
+
+ * sysdeps/x86_64/dl-machine.h (elf_machine_rela): R_X86_64_PC32
+ is sign-extending.
+
+2010-03-03 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/unix/sysv/linux/bits/in.h: Add a few more definitions from
+ the kernel headers.
+
+2010-03-04 David S. Miller <davem@davemloft.net>
+
+ * sysdeps/sparc/sparc64/Implies: Add ieee754/dbl-64/wordsize-64 entry.
+
+ * sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S: Delete.
+ * sysdeps/sparc/sparc32/udiv_qrnnd.S: Delete.
+
+2010-03-03 David S. Miller <davem@davemloft.net>
+
+ * math/math_private.h (INSERT_WORDS64): Fix argument order.
+
+2010-03-03 Aurelien Jarno <aurelien@aurel32.net>
+
+ * sysdeps/unix/sysv/linux/sparc/sparc64/msgrcv.c: New file.
+
+2010-03-03 David S. Miller <davem@davemloft.net>
+
+ * sysdeps/sparc/sparc32/dl-machine.h (elf_machine_lazy_rel): Must
+ pass '1' for 't' argument to sparc_fixup_plt.
+ * sysdeps/sparc/sparc64/dl-machine.h (elf_machine_lazy_rel):
+ Likewise.
+ * sysdeps/sparc/sparc32/dl-plt.h (OPCODE_BA_PT): Define.
+ (sparc_fixup_plt): Document 't' argument. Enable branch
+ optimization and use v9 branches when possible. Explain why we
+ cannot unconditionally patch the branch into the first PLT
+ instruction.
+ * sysdeps/sparc/sparc64/dl-plt.h (sparc64_fixup_plt): Document 't'
+ argument. Use v9 branches when possible. Explain why we can in
+ fact unconditionally use a branch in the first PLT instruction
+ here.
+
+2010-02-28 Roland McGrath <roland@redhat.com>
+
+ * elf/elf.h (NT_X86_XSTATE): New macro.
+
+2010-02-25 David S. Miller <davem@davemloft.net>
+
+ * sysdeps/sparc/sparc64/Makefile: Add align-cpy rule.
+ * sysdeps/sparc/sparc64/align-cpy.S: New.
+ * sysdeps/sparc/sparc64/memcpy.S (__align_cpy_1, __align_cpy_2,
+ __align_cpy_4, __align_cpy_8, __align_cpy_16): Remove.
+ * sysdeps/sparc/sparc64/sparcv9b/memcpy.S (__align_cpy_1,
+ __align_cpy_2, __align_cpy_4, __align_cpy_8, __align_cpy_16):
+ Remove.
+ * sysdeps/sparc/sparc64/sparcv9v/memcpy.S (__align_cpy_1,
+ __align_cpy_2, __align_cpy_4, __align_cpy_8, __align_cpy_16):
+ Remove.
+ * sysdeps/sparc/sparc64/sparcv9v2/memcpy.S (__align_cpy_1,
+ __align_cpy_2, __align_cpy_4, __align_cpy_8, __align_cpy_16):
+ Remove.
+
+ * sysdeps/sparc/sparc32/bcopy.c: Delete.
+ * sysdeps/sparc/sparc32/memmove.c: Delete.
+ * sysdeps/sparc/sparc32/sparcv9/bcopy.c: Delete.
+ * sysdeps/sparc/sparc32/sparcv9/memmove.c: Delete.
+ * sysdeps/sparc/sparc64/bcopy.c: Delete.
+ * sysdeps/sparc/sparc64/memmove.c: Delete.
+ * sysdeps/sparc/sparc64/memcopy.h: New.
+ * sysdeps/sparc/sparc32/memcpy.S (bcopy, memmove): Remove.
+ * sysdeps/sparc/sparc64/memcpy.S (bcopy, memmove): Likewise.
+ * sysdeps/sparc/sparc64/sparcv9b/memcpy.S (bcopy, memmove): Likewise.
+ * sysdeps/sparc/sparc64/sparcv9v/memcpy.S (bcopy, memmove): Likewise.
+ * sysdeps/sparc/sparc64/sparcv9v2/memcpy.S (bcopy, memmove): Likewise.
+
+ * sysdeps/sparc/elf/rtld-global-offsets.sym: New file.
+ * sysdeps/sparc/Makefile (csu): Add rtld-global-offsets.sym to
+ gen-as-const-headers.
+ * sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile: New file.
+ * sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy.S: New file.
+ * sysdeps/sparc/sparc32/sparcv9/multiarch/memset.S: New file.
+ * sysdeps/sparc/sparc32/sparcv9/sparcv9b/memcpy.S: Move to...
+ * sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra3.S: ...here.
+ * sysdeps/sparc/sparc32/sparcv9/sparcv9v/memcpy.S: Move to...
+ * sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara1.S: ...here.
+ * sysdeps/sparc/sparc32/sparcv9/sparcv9v/memset.S: Move to...
+ * sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara1.S: ...here.
+ * sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memcpy.S: Move to...
+ * sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara2.S: ...here.
+ * sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memset.S: Removed.
+ * sysdeps/sparc/sparc64/multiarch/Makefile: New file.
+ * sysdeps/sparc/sparc64/sparcv9v/memcpy.S: Move to...
+ * sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S: ...here.
+ * sysdeps/sparc/sparc64/sparcv9v2/memcpy.S: Move to...
+ * sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S: ...here.
+ * sysdeps/sparc/sparc64/sparcv9b/memcpy.S: Move to...
+ * sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S: ...here.
+ * sysdeps/sparc/sparc64/sparcv9v/memset.S: Move to...
+ * sysdeps/sparc/sparc64/multiarch/memset-niagara1.S: ...here.
+ * sysdeps/sparc/sparc64/sparcv9v2/memset.S: Removed.
+ * sysdeps/sparc/sparc64/multiarch/memcpy.S: New file.
+ * sysdeps/sparc/sparc64/multiarch/memset.S: New file.
+
+2010-02-20 H.J. Lu <hongjiu.lu@intel.com>
+
+ [BZ #11332]
+ * sysdeps/i386/i686/multiarch/strcmp-sse4.S: Use cfi_remember_state
+ and cfi_restore_state only if USE_AS_STRNCMP is defined.
+
+2010-02-24 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/x86_64/Implies: Add ieee754/dbl-64/wordsize-64 entry.
+
+2010-02-24 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
+ (bk_write_less32bytes_2): Renamed to ...
+ (bk_write_less48bytes): This.
+ Use unsigned conditional jumps.
+ Correct unwind info.
+ Use add/sub instead of lea if possible.
+ (shl_0_gobble_cache_loop_tail): Removed.
+ (large_page): Properly adjust ECX.
+
+ * sysdeps/i386/i686/multiarch/memcpy-ssse3.S: Use unsigned
+ conditional jumps.
+ Correct unwind info.
+
+ * sysdeps/i386/i686/multiarch/memset-sse2-rep.S: Remove redundant
+ punpcklbw.
+ Use unsigned conditional jumps.
+ (128bytesormore_nt): Renamed to ...
+ (128bytesormore_endof_L1): This.
+ Use add instead of lea if possible.
+ Correct unwind info.
+ * sysdeps/i386/i686/multiarch/memset-sse2.S: Remove redundant
+ punpcklbw.
+ Use unsigned conditional jumps.
+ Use add instead of lea if possible.
+ Correct unwind info.
+
+2010-02-24 Ulrich Drepper <drepper@redhat.com>
+
+ [BZ #11319]
+ * libio/iovdprintf.c (_IO_vdprintf): Explicitly flush stream before
+ undoing the stream because _IO_FINISH doesn't report failures.
+
+ [BZ #5553]
+ * malloc/malloc.c (public_vALLOc): Set ar_ptr when trying main_arena.
+ (public_pVALLOc): Likewise.
+ Patch by Petr Baudis.
+
+2010-02-22 Jim Meyering <meyering@redhat.com>
+
+ * manual/math.texi (BSD Random): Fix a typo: s/are/is/
+
+ * manual/charset.texi: Adjust grammar.
+
+ * manual/errno.texi (Error Messages): Fix doubled-words and typos.
+ * manual/charset.texi (Selecting the Conversion): Likewise.
+ * manual/getopt.texi (Getopt Long Options): Likewise.
+ * manual/memory.texi (Resizing the Data Segment): Likewise.
+ * manual/message.texi (GUI program problems): Likewise.
+ * manual/resource.texi (CPU Affinity): Likewise.
+ * manual/stdio.texi (Streams and Threads): Likewise.
+ * manual/time.texi (High Accuracy Clock): Likewise.
+
+2009-02-20 David S. Miller <davem@davemloft.net>
+
+ * sysdeps/sparc/sparc32/dl-machine.h (elf_machine_runtime_setup):
+ Adjust rela->r_offset by l->l_addr when rewriting PLT.
+
+ * sysdeps/sparc/sparc64/memchr.S: Use unsigned branch on greater to
+ test end of buffer.
+
+ * sysdeps/unix/sysv/linux/sparc/sparc32/makecontext.c
+ (__makecontext): Adjust arg counter properly when copying arg
+ stack slots.
+
+2009-02-20 Joseph Myers <joseph@codesourcery.com>
+
+ * sysdeps/sparc/sparc64/dl-machine.h (elf_machine_runtime_setup):
+ Adjust rela->r_offset by l->l_addr when rewriting PLT.
+
+2010-02-19 Carl Fredrik Hammar <hammy.lite@gmail.com>
+
+ * hurd/hurdioctl.c (tiocsctty): Call `do_tiocsctty' instead of
+ non-existent `tiocsctty_port'.
+
+2010-02-16 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/i386/i686/multiarch/memcmp.S (memcmp): Use CPUID_OFFSET
+ instead of FEATURE_OFFSET.
+ * sysdeps/i386/i686/multiarch/strcmp.S (strcmp): Likewise.
+
+ * sysdeps/i386/i686/multiarch/memcmp-sse4.S: Add alignnments.
+ Fix one unwind info problem.
+
+ * sysdeps/i386/i686/multiarch/memcmp-ssse3.S (less1bytes): Add CFI_POP.
+
+ * sysdeps/i386/i686/multiarch/strcmp-sse4.S: Simplify unwind info.
+
+2010-02-17 H.J. Lu <hongjiu.lu@intel.com>
+ Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/i386/i686/multiarch/strcmp-ssse3.S: Fix typo in unwind info.
+ Clean up a bit.
+
+2010-02-17 Carl Fredrik Hammar <hammy.lite@gmail.com>
+
+ * hurd/hurdioctl.c (tiocsctty): Only get FD ports, do work in...
+ (tiocsctty_port): ...this new function.
+
+ * hurd/hurd/ioctl.h (_HURD_HANDLE_IOCTLS_1): Cast to
+ `ioctl_handler_t'.
+
+2010-02-15 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/i386/i686/multiarch/memcmp-sse4.S: Fix unwind info.
+ * sysdeps/i386/i686/multiarch/memcmp-ssse3.S: Likewise.
+ * sysdeps/i386/i686/multiarch/strcmp-sse4.S: Likewise.
+ * sysdeps/i386/i686/multiarch/strcmp-ssse3.S: Likewise.
+
+ * sysdeps/i386/i686/multiarch/strcmp-sse4.S: Don't fall through to
+ undefined code.
+
+2010-02-12 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add
+ strcmp-ssse3, strcmp-sse4, strncmp-c, strncmp-ssse3, strncmp-sse4,
+ memcmp-c, memcmp-ssse3, and memcmp-sse4.
+ * sysdeps/i386/i686/multiarch/memcmp-sse4.S: New file.
+ * sysdeps/i386/i686/multiarch/memcmp-ssse3.S: New file.
+ * sysdeps/i386/i686/multiarch/memcmp.S: New file.
+ * sysdeps/i386/i686/multiarch/strcmp-sse4.S: New file.
+ * sysdeps/i386/i686/multiarch/strcmp-ssse3.S: New file.
+ * sysdeps/i386/i686/multiarch/strcmp.S: New file.
+ * sysdeps/i386/i686/multiarch/strncmp-c.c: New file.
+ * sysdeps/i386/i686/multiarch/strncmp-sse4.S: New file.
+ * sysdeps/i386/i686/multiarch/strncmp-ssse3.S: New file.
+ * sysdeps/i386/i686/multiarch/strncmp.S: New file.
+
+2010-02-12 Luis Machado <luisgpm@br.ibm.com>
+
+ * sysdeps/powerpc/powerpc32/dl-machine.h: Removed old PPC_REL16 check.
+ * sysdeps/powerpc/powerpc32/dl-machine.h: Likewise.
+ * sysdeps/powerpc/powerpc32/elf/start.S: Likewise.
+ * sysdeps/powerpc/powerpc32/memset.S: Likewise.
+ * sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S: Likewise.
+ * sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S: Likewise.
+ * sysdeps/powerpc/powerpc32/configure.in: Fail if R_PPC_REL16
+ is not supported.
+ * sysdeps/powerpc/powerpc32/fpu/s_round.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_truncf.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_floorf.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_ceilf.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_ceil.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_floor.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_roundf.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_rintf.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_trunc.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/setjmp-common.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_lround.S: Likewise.
+ * sysdeps/powerpc/powerpc32/fpu/s_rint.S: Likewise.
+ * sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S: Likewise.
+ * sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S: Likewise.
+ * sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S: Likewise.
+ * sysdeps/powerpc/powerpc32/dl-start.S: Likewise.
+ * sysdeps/unix/sysv/linux/powerpc/powerpc32/____longjmp_chk.S:
+ Likewise.
+ * sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S:
+ Likewise.
+ * sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S:
+ Likewise.
+ * sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S:
+ Likewise.
+ * sysdeps/unix/sysv/linux/powerpc/powerpc32/brk.S: Likewise.
+
+2010-02-12 Alan Modra <amodra@gmail.com>
+
+ * elf/tls-macros.h [__powerpc__] (__TLS_CALL_CLOBBERS): Remove r3.
+ Define and use for __powerpc64__ too.
+ [__powerpc__] (TLS_LD): Add r3 to clobbers.
+ [__powerpc__] (TLS_GD): Set asm output. Make __result r3 reg.
+ [__powerpc64__] (TLS_GD): Make __result r3 reg.
+ [__powerpc64__] (TLS_IE): Relax output constraint.
+
+2010-02-11 Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
+
+ * sysdeps/s390/s390-64/utf8-utf16-z9.c: Disable hardware
+ instructions cu21 and cu24. Add well-formedness checking
+ parameter and adjust the software implementation.
+ * sysdeps/s390/s390-64/utf16-utf32-z9.c: Likewise.
+
+2010-02-10 Ulrich Drepper <drepper@redhat.com>
+
+ [BZ #11271]
+ * io/ftw.c (ftw_startup): Close descriptor for initial directory
+ after changing back to it.
+
+2010-02-05 David S. Miller <davem@davemloft.net>
+
+ * elf/elf.h (R_SPARC_JMP_IREL, R_SPARC_IRELATIVE): Define.
+ * sysdeps/sparc/sparc32/dl-machine.h (elf_machine_rela): Handle new
+ ifunc relocs.
+ (elf_machine_lazy_rel): Likewise.
+ (sparc_fixup_plt): Pull out to...
+ * sysdeps/sparc/sparc32/dl-plt.h: ...here.
+ * sysdeps/sparc/sparc32/dl-irel.h: New file.
+ * sysdeps/sparc/sparc64/dl-machine.h (elf_machine_rela): Handle new
+ ifunc relocs.
+ (elf_machine_lazy_rel): Likewise.
+ (sparc64_fixup_plt): Pull out to...
+ * sysdeps/sparc/sparc64/dl-plt.h: ...here.
+ * sysdeps/sparc/sparc64/dl-irel.h: New file.
+
+2010-02-09 Maxim Kuvyrkov <maxim@codesourcery.com>
+
+ * elf/elf.h: Define m68k TLS relocations.
+
+2010-02-10 Luis Machado <luisgpm@br.ibm.com>
+
+ * sysdeps/powerpc/powerpc64/power7/Implies: Removed.
+ * sysdeps/powerpc/powerpc64/power7/fpu/Implies: Removed.
+ * sysdeps/powerpc/powerpc32/power7/Implies: Removed.
+ * sysdeps/powerpc/powerpc32/power7/fpu/Implies: Removed.
+ * sysdeps/unix/sysv/linux/powerpc/powerpc64/power7/fpu/Implies: Add
+ 64-bit power7 fpu path.
+ * sysdeps/unix/sysv/linux/powerpc/powerpc32/power7/fpu/Implies: Add
+ 32-bit power7 fpu math.
+
+2010-02-09 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/i386/rshift.S: More compact unwind information.
+
+ * sysdeps/i386/lshift.S: Fix unwind information.
+
+2010-02-08 Luis Machado <luisgpm@br.ibm.com>
+
+ * sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S: New file.
+ * sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S: New file.
+ * sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S: New file.
+ * sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S: New file.
+ * sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S: New file.
+ * sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S: New file.
+ * sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S: New file.
+ * sysdeps/powerpc/powerpc32/power7/fpu/s_isinff.S: New file.
+ * sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S: New file.
+ * sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S: New file.
+ * sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S: New file.
+ * sysdeps/powerpc/powerpc32/power7/fpu/s_finitef.S: New file.
+
+2010-02-08 Andreas Schwab <schwab@redhat.com>
+
+ * include/fenv.h: Add hidden proto for fegetenv.
+ * math/fegetenv.c: Add hidden alias.
+ * sysdeps/i386/fpu/fegetenv.c: Likewise.
+ * sysdeps/ia64/fpu/fegetenv.c: Likewise.
+ * sysdeps/powerpc/fpu/fegetenv.c: Likewise.
+ * sysdeps/sh/sh4/fpu/fegetenv.c: Likewise.
+ * sysdeps/sparc/fpu/fegetenv.c: Likewise.
+ * sysdeps/x86_64/fpu/fegetenv.c: Likewise
+ * sysdeps/s390/fpu/fegetenv.c: Likewise. Remove unused headers.
+
+2010-02-05 H.J. Lu <hongjiu.lu@intel.com>
+
+ [BZ #11230]
+ * sysdeps/ia64/memchr.S: Don't read beyond the last byte
+ during recovery.
+
+2010-01-26 Andreas Schwab <schwab@redhat.com>
+
+ * sysdeps/unix/sysv/linux/s390/s390-32/utmp32.c: Use internal
+ interface.
+ * sysdeps/unix/sysv/linux/s390/s390-32/utmpx32.c: Likewise.
+ * sysdeps/unix/sysv/linux/s390/s390-32/utmpx32.h: Declare internal
+ utmpx interface.
+
+ * sysdeps/unix/sysv/linux/sparc/Versions: Add new errlist compat
+ entry for 2.12.
+
2010-02-03 Ulrich Drepper <drepper@redhat.com>
[BZ #11235]
diff --git a/libc/README b/libc/README
index f661c3a8f..0ba128879 100644
--- a/libc/README
+++ b/libc/README
@@ -42,20 +42,23 @@ In GNU/Hurd systems, it works with a microkernel and Hurd servers.
The GNU C Library implements much of the POSIX.1 functionality in the
GNU/Hurd system, using configurations i[34567]86-*-gnu.
-When working with Linux kernels, the GNU C Library version 2.4 is
-intended primarily for use with Linux kernel version 2.6.0 and later.
-We only support using the NPTL implementation of pthreads, which is now
-the default configuration. Most of the C library will continue to work
-on older Linux kernels and many programs will not require a 2.6 kernel
-to run correctly. However, pthreads and related functionality will not
-work at all on old kernels and we do not recommend using glibc 2.4 with
-any Linux kernel prior to 2.6.
+When working with Linux kernels, the GNU C Library version from
+version 2.4 on is intended primarily for use with Linux kernel version
+2.6.0 and later. We only support using the NPTL implementation of
+pthreads, which is now the default configuration. Most of the C
+library will continue to work on older Linux kernels and many programs
+will not require a 2.6 kernel to run correctly. However, pthreads and
+related functionality will not work at all on old kernels and we do
+not recommend using glibc 2.4 with any Linux kernel prior to 2.6.
All Linux kernel versions prior to 2.6.16 are known to have some bugs that
may cause some of the tests related to pthreads in "make check" to fail.
If you see such problems, please try the test suite on the most recent
Linux kernel version that you can use, before pursuing those bugs further.
+Also note that the shared version of the libgcc_s library must be
+installed for the pthread library to work correctly.
+
The old LinuxThreads add-on implementation of pthreads for older Linux
kernels is no longer supported, and we are not distributing it with this
release. Someone has volunteered to revive its maintenance unofficially
@@ -76,7 +79,6 @@ The GNU C Library supports these configurations for using Linux kernels:
sparc*-*-linux-gnu
sparc64*-*-linux-gnu
- alpha*-*-linux-gnu Requires Linux 2.6.9 for NPTL
sh[34]-*-linux-gnu Requires Linux 2.6.11
The code for other CPU configurations supported by volunteers outside of
@@ -85,6 +87,7 @@ add-on. You can find glibc-ports-VERSION distributed separately in the
same place where you got the main glibc distribution files.
Currently these configurations are known to work using the `ports' add-on:
+ alpha*-*-linux-gnu Requires Linux 2.6.9 for NPTL
arm-*-linux-gnu Requires Linux 2.6.15 for NPTL, no SMP support
arm-*-linux-gnueabi Requires Linux 2.6.16-rc1 for NPTL, no SMP
mips-*-linux-gnu Requires Linux 2.6.12 for NPTL
diff --git a/libc/config.make.in b/libc/config.make.in
index fda33bf39..a62c37885 100644
--- a/libc/config.make.in
+++ b/libc/config.make.in
@@ -36,6 +36,7 @@ asflags-cpu = @libc_cv_cc_submachine@
config-cflags-sse4 = @libc_cv_cc_sse4@
config-cflags-avx = @libc_cv_cc_avx@
+config-asflags-i686 = @libc_cv_as_i686@
defines = @DEFINES@
sysincludes = @SYSINCLUDES@
diff --git a/libc/configure b/libc/configure
index b620db88b..6b44eb2fc 100755
--- a/libc/configure
+++ b/libc/configure
@@ -657,6 +657,7 @@ xcoff
elf
ldd_rewrite_script
use_ldconfig
+libc_cv_as_i686
libc_cv_cc_avx
libc_cv_cc_sse4
libc_cv_cpp_asm_debuginfo
@@ -8899,6 +8900,7 @@ fi
+
if test $elf = yes; then
cat >>confdefs.h <<\_ACEOF
#define HAVE_ELF 1
diff --git a/libc/configure.in b/libc/configure.in
index 82a974e62..16f1d38b6 100644
--- a/libc/configure.in
+++ b/libc/configure.in
@@ -2342,6 +2342,7 @@ dnl sysdeps/CPU/configure.in checks set this via arch-specific asm tests
AC_SUBST(libc_cv_cpp_asm_debuginfo)
AC_SUBST(libc_cv_cc_sse4)
AC_SUBST(libc_cv_cc_avx)
+AC_SUBST(libc_cv_as_i686)
AC_SUBST(use_ldconfig)
AC_SUBST(ldd_rewrite_script)
diff --git a/libc/elf/dl-dst.h b/libc/elf/dl-dst.h
index 76076a603..ae8d119c0 100644
--- a/libc/elf/dl-dst.h
+++ b/libc/elf/dl-dst.h
@@ -18,6 +18,8 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
+#include "trusted-dirs.h"
+
/* Determine the number of DST elements in the name. Only if IS_PATH is
nonzero paths are recognized (i.e., multiple, ':' separated filenames). */
#define DL_DST_COUNT(name, is_path) \
@@ -39,12 +41,13 @@
\
if (__cnt > 0) \
{ \
- size_t origin_len; \
+ size_t dst_len; \
/* Now we make a guess how many extra characters on top of the \
length of S we need to represent the result. We know that \
we have CNT replacements. Each at most can use \
- MAX (strlen (ORIGIN), strlen (_dl_platform)) \
- minus 7 (which is the length of "$ORIGIN"). \
+ MAX (MAX (strlen (ORIGIN), strlen (_dl_platform)), \
+ strlen (DL_DST_LIB)) \
+ minus 4 (which is the length of "$LIB"). \
\
First get the origin string if it is not available yet. \
This can only happen for the map of the executable. */ \
@@ -53,14 +56,16 @@
{ \
assert ((l)->l_name[0] == '\0'); \
(l)->l_origin = _dl_get_origin (); \
- origin_len = ((l)->l_origin && (l)->l_origin != (char *) -1 \
+ dst_len = ((l)->l_origin && (l)->l_origin != (char *) -1 \
? strlen ((l)->l_origin) : 0); \
} \
else \
- origin_len = (l)->l_origin == (char *) -1 \
+ dst_len = (l)->l_origin == (char *) -1 \
? 0 : strlen ((l)->l_origin); \
- \
- __len += __cnt * (MAX (origin_len, GLRO(dl_platformlen)) - 7); \
+ dst_len = MAX (MAX (dst_len, GLRO(dl_platformlen)), \
+ strlen (DL_DST_LIB)); \
+ if (dst_len > 4) \
+ __len += __cnt * (dst_len - 4); \
} \
\
__len; })
@@ -72,7 +77,7 @@
if ((l) == NULL) \
{ \
const char *origin = _dl_get_origin (); \
- origin_len = (origin && origin != (char *) -1 ? strlen (origin) : 0); \
+ dst_len = (origin && origin != (char *) -1 ? strlen (origin) : 0); \
} \
else
#endif
diff --git a/libc/elf/dl-load.c b/libc/elf/dl-load.c
index 0fa3dca56..cd02c8df7 100644
--- a/libc/elf/dl-load.c
+++ b/libc/elf/dl-load.c
@@ -1,5 +1,5 @@
/* Map in a shared object's segments from the file.
- Copyright (C) 1995-2005, 2006, 2007, 2009 Free Software Foundation, Inc.
+ Copyright (C) 1995-2005, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -313,7 +313,7 @@ static char *
expand_dynamic_string_token (struct link_map *l, const char *s)
{
/* We make two runs over the string. First we determine how large the
- resulting string is and then we copy it over. Since this is now
+ resulting string is and then we copy it over. Since this is no
frequently executed operation we are looking here not for performance
but rather for code size. */
size_t cnt;
@@ -391,7 +391,7 @@ fillin_rpath (char *rpath, struct r_search_path_elem **result, const char *sep,
size_t len = strlen (cp);
/* `strsep' can pass an empty string. This has to be
- interpreted as `use the current directory'. */
+ interpreted as `use the current directory'. */
if (len == 0)
{
static const char curwd[] = "./";
@@ -1519,7 +1519,7 @@ cannot enable executable stack as shared object requires");
/* Print search path. */
static void
print_search_path (struct r_search_path_elem **list,
- const char *what, const char *name)
+ const char *what, const char *name)
{
char buf[max_dirnamelen + max_capstrlen];
int first = 1;
@@ -2044,7 +2044,7 @@ _dl_map_object (struct link_map *loader, const char *name, int preloaded,
fd = -1;
/* When the object has the RUNPATH information we don't use any
- RPATHs. */
+ RPATHs. */
if (loader == NULL || loader->l_info[DT_RUNPATH] == NULL)
{
/* This is the executable's map (if there is one). Make sure that
@@ -2067,7 +2067,7 @@ _dl_map_object (struct link_map *loader, const char *name, int preloaded,
}
/* If dynamically linked, try the DT_RPATH of the executable
- itself. NB: we do this for lookups in any namespace. */
+ itself. NB: we do this for lookups in any namespace. */
if (fd == -1 && !did_main_map
&& main_map != NULL && main_map->l_type != lt_loaded
&& cache_rpath (main_map, &main_map->l_rpath_dirs, DT_RPATH,
@@ -2164,7 +2164,7 @@ _dl_map_object (struct link_map *loader, const char *name, int preloaded,
/* Add another newline when we are tracing the library loading. */
if (__builtin_expect (GLRO_dl_debug_mask & DL_DEBUG_LIBS, 0))
- _dl_debug_printf ("\n");
+ _dl_debug_printf ("\n");
}
else
{
diff --git a/libc/elf/elf.h b/libc/elf/elf.h
index 8af7c177c..f41a02649 100644
--- a/libc/elf/elf.h
+++ b/libc/elf/elf.h
@@ -619,6 +619,7 @@ typedef struct
#define NT_PPC_VSX 0x102 /* PowerPC VSX registers */
#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */
#define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */
+#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */
/* Legal values for the note segment descriptor types for object files. */
@@ -1123,8 +1124,29 @@ typedef struct
#define R_68K_GLOB_DAT 20 /* Create GOT entry */
#define R_68K_JMP_SLOT 21 /* Create PLT entry */
#define R_68K_RELATIVE 22 /* Adjust by program base */
+#define R_68K_TLS_GD32 25 /* 32 bit GOT offset for GD */
+#define R_68K_TLS_GD16 26 /* 16 bit GOT offset for GD */
+#define R_68K_TLS_GD8 27 /* 8 bit GOT offset for GD */
+#define R_68K_TLS_LDM32 28 /* 32 bit GOT offset for LDM */
+#define R_68K_TLS_LDM16 29 /* 16 bit GOT offset for LDM */
+#define R_68K_TLS_LDM8 30 /* 8 bit GOT offset for LDM */
+#define R_68K_TLS_LDO32 31 /* 32 bit module-relative offset */
+#define R_68K_TLS_LDO16 32 /* 16 bit module-relative offset */
+#define R_68K_TLS_LDO8 33 /* 8 bit module-relative offset */
+#define R_68K_TLS_IE32 34 /* 32 bit GOT offset for IE */
+#define R_68K_TLS_IE16 35 /* 16 bit GOT offset for IE */
+#define R_68K_TLS_IE8 36 /* 8 bit GOT offset for IE */
+#define R_68K_TLS_LE32 37 /* 32 bit offset relative to
+ static TLS block */
+#define R_68K_TLS_LE16 38 /* 16 bit offset relative to
+ static TLS block */
+#define R_68K_TLS_LE8 39 /* 8 bit offset relative to
+ static TLS block */
+#define R_68K_TLS_DTPMOD32 40 /* 32 bit module number */
+#define R_68K_TLS_DTPREL32 41 /* 32 bit module-relative offset */
+#define R_68K_TLS_TPREL32 42 /* 32 bit TP-relative offset */
/* Keep this the last entry. */
-#define R_68K_NUM 23
+#define R_68K_NUM 43
/* Intel 80386 specific definitions. */
@@ -1303,6 +1325,8 @@ typedef struct
#define R_SPARC_H34 85
#define R_SPARC_SIZE32 86
#define R_SPARC_SIZE64 87
+#define R_SPARC_JMP_IREL 248
+#define R_SPARC_IRELATIVE 249
#define R_SPARC_GNU_VTINHERIT 250
#define R_SPARC_GNU_VTENTRY 251
#define R_SPARC_REV32 252
@@ -2642,7 +2666,15 @@ typedef Elf32_Addr Elf32_Conflict;
#define R_X86_64_GOTOFF64 25 /* 64 bit offset to GOT */
#define R_X86_64_GOTPC32 26 /* 32 bit signed pc relative
offset to GOT */
-/* 27 .. 33 */
+#define R_X86_64_GOT64 27 /* 64-bit GOT entry offset */
+#define R_X86_64_GOTPCREL64 28 /* 64-bit PC relative offset
+ to GOT entry */
+#define R_X86_64_GOTPC64 29 /* 64-bit PC relative offset to GOT */
+#define R_X86_64_GOTPLT64 30 /* like GOT64, says PLT entry needed */
+#define R_X86_64_PLTOFF64 31 /* 64-bit GOT relative offset
+ to PLT entry */
+#define R_X86_64_SIZE32 32 /* Size of symbol plus 32-bit addend */
+#define R_X86_64_SIZE64 33 /* Size of symbol plus 64-bit addend */
#define R_X86_64_GOTPC32_TLSDESC 34 /* GOT offset for TLS descriptor. */
#define R_X86_64_TLSDESC_CALL 35 /* Marker for call through TLS
descriptor. */
diff --git a/libc/elf/tls-macros.h b/libc/elf/tls-macros.h
index 6463a6c3f..781256db1 100644
--- a/libc/elf/tls-macros.h
+++ b/libc/elf/tls-macros.h
@@ -701,154 +701,146 @@ register void *__gp __asm__("$29");
(int *) (__builtin_thread_pointer() + __offset); })
# endif
-#elif defined __powerpc__ && !defined __powerpc64__
+#elif defined __powerpc__
-#include "config.h"
-
-# define __TLS_CALL_CLOBBERS \
- "0", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", \
+# define __TLS_CALL_CLOBBERS \
+ "0", "4", "5", "6", "7", "8", "9", "10", "11", "12", \
"lr", "ctr", "cr0", "cr1", "cr5", "cr6", "cr7"
+# ifndef __powerpc64__
+
+# include "config.h"
+
/* PowerPC32 Local Exec TLS access. */
-# define TLS_LE(x) \
- ({ int *__result; \
- asm ("addi %0,2," #x "@tprel" \
- : "=r" (__result)); \
+# define TLS_LE(x) \
+ ({ int *__result; \
+ asm ("addi %0,2," #x "@tprel" \
+ : "=r" (__result)); \
__result; })
/* PowerPC32 Initial Exec TLS access. */
-# ifdef HAVE_ASM_PPC_REL16
-# define TLS_IE(x) \
- ({ int *__result; \
- asm ("bcl 20,31,1f\n1:\t" \
- "mflr %0\n\t" \
- "addis %0,%0,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" \
- "addi %0,%0,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" \
- "lwz %0," #x "@got@tprel(%0)\n\t" \
- "add %0,%0," #x "@tls" \
- : "=b" (__result) : \
- : "lr"); \
+# ifdef HAVE_ASM_PPC_REL16
+# define TLS_IE(x) \
+ ({ int *__result; \
+ asm ("bcl 20,31,1f\n1:\t" \
+ "mflr %0\n\t" \
+ "addis %0,%0,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" \
+ "addi %0,%0,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" \
+ "lwz %0," #x "@got@tprel(%0)\n\t" \
+ "add %0,%0," #x "@tls" \
+ : "=b" (__result) : \
+ : "lr"); \
__result; })
-# else
-# define TLS_IE(x) \
- ({ int *__result; \
- asm ("bl _GLOBAL_OFFSET_TABLE_@local-4\n\t" \
- "mflr %0\n\t" \
- "lwz %0," #x "@got@tprel(%0)\n\t" \
- "add %0,%0," #x "@tls" \
- : "=b" (__result) : \
- : "lr"); \
+# else
+# define TLS_IE(x) \
+ ({ int *__result; \
+ asm ("bl _GLOBAL_OFFSET_TABLE_@local-4\n\t" \
+ "mflr %0\n\t" \
+ "lwz %0," #x "@got@tprel(%0)\n\t" \
+ "add %0,%0," #x "@tls" \
+ : "=b" (__result) : \
+ : "lr"); \
__result; })
-# endif
+# endif
/* PowerPC32 Local Dynamic TLS access. */
-# ifdef HAVE_ASM_PPC_REL16
-# define TLS_LD(x) \
- ({ int *__result; \
- asm ("bcl 20,31,1f\n1:\t" \
- "mflr 3\n\t" \
- "addis 3,3,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" \
- "addi 3,3,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" \
- "addi 3,3," #x "@got@tlsld\n\t" \
- "bl __tls_get_addr@plt\n\t" \
- "addi %0,3," #x "@dtprel" \
- : "=r" (__result) : \
- : __TLS_CALL_CLOBBERS); \
+# ifdef HAVE_ASM_PPC_REL16
+# define TLS_LD(x) \
+ ({ int *__result; \
+ asm ("bcl 20,31,1f\n1:\t" \
+ "mflr 3\n\t" \
+ "addis 3,3,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" \
+ "addi 3,3,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" \
+ "addi 3,3," #x "@got@tlsld\n\t" \
+ "bl __tls_get_addr@plt\n\t" \
+ "addi %0,3," #x "@dtprel" \
+ : "=r" (__result) : \
+ : "3", __TLS_CALL_CLOBBERS); \
__result; })
-# else
-# define TLS_LD(x) \
- ({ int *__result; \
- asm ("bl _GLOBAL_OFFSET_TABLE_@local-4\n\t" \
- "mflr 3\n\t" \
- "addi 3,3," #x "@got@tlsld\n\t" \
- "bl __tls_get_addr@plt\n\t" \
- "addi %0,3," #x "@dtprel" \
- : "=r" (__result) : \
- : __TLS_CALL_CLOBBERS); \
+# else
+# define TLS_LD(x) \
+ ({ int *__result; \
+ asm ("bl _GLOBAL_OFFSET_TABLE_@local-4\n\t" \
+ "mflr 3\n\t" \
+ "addi 3,3," #x "@got@tlsld\n\t" \
+ "bl __tls_get_addr@plt\n\t" \
+ "addi %0,3," #x "@dtprel" \
+ : "=r" (__result) : \
+ : "3", __TLS_CALL_CLOBBERS); \
__result; })
-# endif
+# endif
/* PowerPC32 General Dynamic TLS access. */
-# ifdef HAVE_ASM_PPC_REL16
-# define TLS_GD(x) \
- ({ register int *__result __asm__ ("r3"); \
- asm ("bcl 20,31,1f\n1:\t" \
- "mflr 3\n\t" \
- "addis 3,3,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" \
- "addi 3,3,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" \
- "addi 3,3," #x "@got@tlsgd\n\t" \
- "bl __tls_get_addr@plt" \
- : : \
- : __TLS_CALL_CLOBBERS); \
+# ifdef HAVE_ASM_PPC_REL16
+# define TLS_GD(x) \
+ ({ register int *__result __asm__ ("r3"); \
+ asm ("bcl 20,31,1f\n1:\t" \
+ "mflr 3\n\t" \
+ "addis 3,3,_GLOBAL_OFFSET_TABLE_-1b@ha\n\t" \
+ "addi 3,3,_GLOBAL_OFFSET_TABLE_-1b@l\n\t" \
+ "addi 3,3," #x "@got@tlsgd\n\t" \
+ "bl __tls_get_addr@plt" \
+ : "=r" (__result) : \
+ : __TLS_CALL_CLOBBERS); \
__result; })
-# else
-# define TLS_GD(x) \
- ({ register int *__result __asm__ ("r3"); \
- asm ("bl _GLOBAL_OFFSET_TABLE_@local-4\n\t" \
- "mflr 3\n\t" \
- "addi 3,3," #x "@got@tlsgd\n\t" \
- "bl __tls_get_addr@plt" \
- : : \
- : __TLS_CALL_CLOBBERS); \
+# else
+# define TLS_GD(x) \
+ ({ register int *__result __asm__ ("r3"); \
+ asm ("bl _GLOBAL_OFFSET_TABLE_@local-4\n\t" \
+ "mflr 3\n\t" \
+ "addi 3,3," #x "@got@tlsgd\n\t" \
+ "bl __tls_get_addr@plt" \
+ : "=r" (__result) : \
+ : __TLS_CALL_CLOBBERS); \
__result; })
-# endif
+# endif
-#elif defined __powerpc__ && defined __powerpc64__
+# else
/* PowerPC64 Local Exec TLS access. */
-# define TLS_LE(x) \
- ({ int * __result; \
- asm ( \
- " addis %0,13," #x "@tprel@ha\n" \
- " addi %0,%0," #x "@tprel@l\n" \
- : "=b" (__result) ); \
- __result; \
+# define TLS_LE(x) \
+ ({ int * __result; \
+ asm ("addis %0,13," #x "@tprel@ha\n\t" \
+ "addi %0,%0," #x "@tprel@l" \
+ : "=b" (__result) ); \
+ __result; \
})
/* PowerPC64 Initial Exec TLS access. */
-# define TLS_IE(x) \
- ({ int * __result; \
- asm ( \
- " ld %0," #x "@got@tprel(2)\n" \
- " add %0,%0," #x "@tls\n" \
- : "=b" (__result) ); \
- __result; \
+# define TLS_IE(x) \
+ ({ int * __result; \
+ asm ("ld %0," #x "@got@tprel(2)\n\t" \
+ "add %0,%0," #x "@tls" \
+ : "=r" (__result) ); \
+ __result; \
})
-# ifdef HAVE_ASM_GLOBAL_DOT_NAME
-# define __TLS_GET_ADDR ".__tls_get_addr"
-# else
-# define __TLS_GET_ADDR "__tls_get_addr"
-# endif
+# ifdef HAVE_ASM_GLOBAL_DOT_NAME
+# define __TLS_GET_ADDR ".__tls_get_addr"
+# else
+# define __TLS_GET_ADDR "__tls_get_addr"
+# endif
/* PowerPC64 Local Dynamic TLS access. */
-# define TLS_LD(x) \
- ({ int * __result; \
- asm ( \
- " addi 3,2," #x "@got@tlsld\n" \
- " bl " __TLS_GET_ADDR "\n" \
- " nop \n" \
- " addis %0,3," #x "@dtprel@ha\n" \
- " addi %0,%0," #x "@dtprel@l\n" \
- : "=b" (__result) : \
- : "0", "3", "4", "5", "6", "7", \
- "8", "9", "10", "11", "12", \
- "lr", "ctr", \
- "cr0", "cr1", "cr5", "cr6", "cr7"); \
- __result; \
+# define TLS_LD(x) \
+ ({ int * __result; \
+ asm ("addi 3,2," #x "@got@tlsld\n\t" \
+ "bl " __TLS_GET_ADDR "\n\t" \
+ "nop \n\t" \
+ "addis %0,3," #x "@dtprel@ha\n\t" \
+ "addi %0,%0," #x "@dtprel@l" \
+ : "=b" (__result) : \
+ : "3", __TLS_CALL_CLOBBERS); \
+ __result; \
})
/* PowerPC64 General Dynamic TLS access. */
-# define TLS_GD(x) \
- ({ int * __result; \
- asm ( \
- " addi 3,2," #x "@got@tlsgd\n" \
- " bl " __TLS_GET_ADDR "\n" \
- " nop \n" \
- " mr %0,3\n" \
- : "=b" (__result) : \
- : "0", "3", "4", "5", "6", "7", \
- "8", "9", "10", "11", "12", \
- "lr", "ctr", \
- "cr0", "cr1", "cr5", "cr6", "cr7"); \
- __result; \
+# define TLS_GD(x) \
+ ({ register int *__result __asm__ ("r3"); \
+ asm ("addi 3,2," #x "@got@tlsgd\n\t" \
+ "bl " __TLS_GET_ADDR "\n\t" \
+ "nop " \
+ : "=r" (__result) : \
+ : __TLS_CALL_CLOBBERS); \
+ __result; \
})
+# endif
#elif !defined TLS_LE || !defined TLS_IE \
|| !defined TLS_LD || !defined TLS_GD
diff --git a/libc/hurd/hurd/ioctl.h b/libc/hurd/hurd/ioctl.h
index ee156f02f..e5ab3dc96 100644
--- a/libc/hurd/hurd/ioctl.h
+++ b/libc/hurd/hurd/ioctl.h
@@ -57,7 +57,7 @@ extern int hurd_register_ioctl_handler (int first_request, int last_request,
static const struct ioctl_handler handler##_ioctl_handler##moniker \
__attribute__ ((__unused__)) = \
{ _IOC_NOTYPE (first), _IOC_NOTYPE (last), \
- (int (*) (int, int, void *)) (handler), NULL }; \
+ (ioctl_handler_t) (handler), NULL }; \
text_set_element (_hurd_ioctl_handler_lists, \
handler##_ioctl_handler##moniker)
#define _HURD_HANDLE_IOCTLS(handler, first, last) \
diff --git a/libc/hurd/hurdioctl.c b/libc/hurd/hurdioctl.c
index 7c689841c..04d98629e 100644
--- a/libc/hurd/hurdioctl.c
+++ b/libc/hurd/hurdioctl.c
@@ -1,5 +1,5 @@
/* ioctl commands which must be done in the C library.
- Copyright (C) 1994,95,96,97,99,2001,2002,2009
+ Copyright (C) 1994,95,96,97,99,2001,2002,2009,2010
Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -239,34 +239,40 @@ _hurd_setcttyid (mach_port_t cttyid)
}
-/* Make FD be the controlling terminal.
- This function is called for `ioctl (fd, TCIOSCTTY)'. */
-
-static int
-tiocsctty (int fd,
- int request) /* Always TIOCSCTTY. */
+static inline error_t
+do_tiocsctty (io_t port, io_t ctty)
{
mach_port_t cttyid;
error_t err;
- /* Get FD's cttyid port, unless it is already ours. */
- err = HURD_DPORT_USE (fd, ctty != MACH_PORT_NULL ? EADDRINUSE :
- __term_getctty (port, &cttyid));
- if (err == EADDRINUSE)
- /* FD is already the ctty. Nothing to do. */
+ if (ctty != MACH_PORT_NULL)
+ /* PORT is already the ctty. Nothing to do. */
return 0;
- else if (err)
- return __hurd_fail (err);
+
+ /* Get PORT's cttyid port. */
+ err = __term_getctty (port, &cttyid);
+ if (err)
+ return err;
/* Change the terminal's pgrp to ours. */
- err = HURD_DPORT_USE (fd, __tioctl_tiocspgrp (port, _hurd_pgrp));
+ err = __tioctl_tiocspgrp (port, _hurd_pgrp);
if (err)
- return __hurd_fail (err);
+ __mach_port_deallocate (__mach_task_self (), cttyid);
+ else
+ /* Make it our own. */
+ install_ctty (cttyid);
- /* Make it our own. */
- install_ctty (cttyid);
+ return err;
+}
- return 0;
+/* Make FD be the controlling terminal.
+ This function is called for `ioctl (fd, TCIOSCTTY)'. */
+
+static int
+tiocsctty (int fd,
+ int request) /* Always TIOCSCTTY. */
+{
+ return __hurd_fail (HURD_DPORT_USE (fd, do_tiocsctty (port, ctty)));
}
_HURD_HANDLE_IOCTL (tiocsctty, TIOCSCTTY);
diff --git a/libc/include/fenv.h b/libc/include/fenv.h
index 3aec7e52b..254162d45 100644
--- a/libc/include/fenv.h
+++ b/libc/include/fenv.h
@@ -13,6 +13,7 @@ extern int __fesetenv (__const fenv_t *__envp);
extern int __feupdateenv (__const fenv_t *__envp);
libm_hidden_proto (feraiseexcept)
+libm_hidden_proto (fegetenv)
libm_hidden_proto (fesetenv)
libm_hidden_proto (fesetround)
libm_hidden_proto (feholdexcept)
diff --git a/libc/io/ftw.c b/libc/io/ftw.c
index 9cc09077e..bb7dba8ca 100644
--- a/libc/io/ftw.c
+++ b/libc/io/ftw.c
@@ -1,5 +1,5 @@
/* File tree walker functions.
- Copyright (C) 1996-2004, 2006, 2007, 2008 Free Software Foundation, Inc.
+ Copyright (C) 1996-2004, 2006-2008, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -790,6 +790,7 @@ ftw_startup (const char *dir, int is_nftw, void *func, int descriptors,
{
int save_err = errno;
__fchdir (cwdfd);
+ close_not_cancel_no_status (cwdfd);
__set_errno (save_err);
}
else if (cwd != NULL)
diff --git a/libc/libio/iovdprintf.c b/libc/libio/iovdprintf.c
index edab849a4..5284ff893 100644
--- a/libc/libio/iovdprintf.c
+++ b/libc/libio/iovdprintf.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995, 1997-2000, 2001, 2002, 2003, 2006
+/* Copyright (C) 1995, 1997-2000, 2001, 2002, 2003, 2006, 2010
Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -60,6 +60,9 @@ _IO_vdprintf (d, format, arg)
done = INTUSE(_IO_vfprintf) (&tmpfil.file, format, arg);
+ if (done != EOF && _IO_do_flush (&tmpfil.file) == EOF)
+ done = EOF;
+
_IO_FINISH (&tmpfil.file);
return done;
diff --git a/libc/malloc/malloc.c b/libc/malloc/malloc.c
index b43e454f6..763852ea3 100644
--- a/libc/malloc/malloc.c
+++ b/libc/malloc/malloc.c
@@ -1,5 +1,5 @@
/* Malloc implementation for multiple threads without lock contention.
- Copyright (C) 1996-2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+ Copyright (C) 1996-2009, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Wolfram Gloger <wg@malloc.de>
and Doug Lea <dl@cs.oswego.edu>, 2001.
@@ -3933,9 +3933,10 @@ public_vALLOc(size_t bytes)
if(!p) {
/* Maybe the failure is due to running out of mmapped areas. */
if(ar_ptr != &main_arena) {
- (void)mutex_lock(&main_arena.mutex);
- p = _int_memalign(&main_arena, pagesz, bytes);
- (void)mutex_unlock(&main_arena.mutex);
+ ar_ptr = &main_arena;
+ (void)mutex_lock(&ar_ptr->mutex);
+ p = _int_memalign(ar_ptr, pagesz, bytes);
+ (void)mutex_unlock(&ar_ptr->mutex);
} else {
#if USE_ARENAS
/* ... or sbrk() has failed and there is still a chance to mmap() */
@@ -3978,9 +3979,10 @@ public_pVALLOc(size_t bytes)
if(!p) {
/* Maybe the failure is due to running out of mmapped areas. */
if(ar_ptr != &main_arena) {
- (void)mutex_lock(&main_arena.mutex);
- p = _int_memalign(&main_arena, pagesz, rounded_bytes);
- (void)mutex_unlock(&main_arena.mutex);
+ ar_ptr = &main_arena;
+ (void)mutex_lock(&ar_ptr->mutex);
+ p = _int_memalign(ar_ptr, pagesz, rounded_bytes);
+ (void)mutex_unlock(&ar_ptr->mutex);
} else {
#if USE_ARENAS
/* ... or sbrk() has failed and there is still a chance to mmap() */
diff --git a/libc/manual/charset.texi b/libc/manual/charset.texi
index 79854e50b..808469b8c 100644
--- a/libc/manual/charset.texi
+++ b/libc/manual/charset.texi
@@ -393,7 +393,7 @@ We already said above that the currently selected locale for the
by the functions we are about to describe. Each locale uses its own
character set (given as an argument to @code{localedef}) and this is the
one assumed as the external multibyte encoding. The wide character
-character set always is UCS-4, at least on GNU systems.
+set is always UCS-4, at least on GNU systems.
A characteristic of each multibyte character set is the maximum number
of bytes that can be necessary to represent one character. This
@@ -577,8 +577,8 @@ The @code{btowc} function was introduced in @w{Amendment 1} to @w{ISO C90}
and is declared in @file{wchar.h}.
@end deftypefun
-Despite the limitation that the single byte value always is interpreted
-in the initial state this function is actually useful most of the time.
+Despite the limitation that the single byte value is always interpreted
+in the initial state, this function is actually useful most of the time.
Most characters are either entirely single-byte character sets or they
are extension to ASCII. But then it is possible to write code like this
(not that this specific example is very useful):
@@ -607,10 +607,10 @@ that there is no guarantee that one can perform this kind of arithmetic
on the character of the character set used for @code{wchar_t}
representation. In other situations the bytes are not constant at
compile time and so the compiler cannot do the work. In situations like
-this it is necessary @code{btowc}.
+this, using @code{btowc} is required.
@noindent
-There also is a function for the conversion in the other direction.
+There is also a function for the conversion in the other direction.
@comment wchar.h
@comment ISO
diff --git a/libc/manual/errno.texi b/libc/manual/errno.texi
index 03a868e45..3b0af0c38 100644
--- a/libc/manual/errno.texi
+++ b/libc/manual/errno.texi
@@ -1425,7 +1425,7 @@ available on all systems implementing @w{ISO C}. But often the text
@code{perror} generates is not what is wanted and there is no way to
extend or change what @code{perror} does. The GNU coding standard, for
instance, requires error messages to be preceded by the program name and
-programs which read some input files should should provide information
+programs which read some input files should provide information
about the input file name and the line number in case an error is
encountered while reading the file. For these occasions there are two
functions available which are widely used throughout the GNU project.
diff --git a/libc/manual/getopt.texi b/libc/manual/getopt.texi
index 8c9bd20d6..77045157e 100644
--- a/libc/manual/getopt.texi
+++ b/libc/manual/getopt.texi
@@ -269,7 +269,7 @@ When @code{getopt_long} has no more options to handle, it returns
@var{argv} of the next remaining argument.
@end deftypefun
-Since long option names were used before before the @code{getopt_long}
+Since long option names were used before the @code{getopt_long}
options was invented there are program interfaces which require programs
to recognize options like @w{@samp{-option value}} instead of
@w{@samp{--option value}}. To enable these programs to use the GNU
diff --git a/libc/manual/math.texi b/libc/manual/math.texi
index 50e087c48..95e3378c9 100644
--- a/libc/manual/math.texi
+++ b/libc/manual/math.texi
@@ -1421,7 +1421,7 @@ pseudo-random number generator.
The GNU C library contains four additional functions which contain the
state as an explicit parameter and therefore make it possible to handle
-thread-local PRNGs. Beside this there are no difference. In fact, the
+thread-local PRNGs. Beside this there is no difference. In fact, the
four functions already discussed are implemented internally using the
following interfaces.
diff --git a/libc/manual/memory.texi b/libc/manual/memory.texi
index 43afc7bf9..59ea1ee34 100644
--- a/libc/manual/memory.texi
+++ b/libc/manual/memory.texi
@@ -2379,7 +2379,7 @@ exceed the process' data storage limit.
@c The Brk system call in Linux (as opposed to the GNU C Library function)
@c is considerably different. It always returns the new end of the data
@c segment, whether it succeeds or fails. The GNU C library Brk determines
-@c it's a failure if and only if if the system call returns an address less
+@c it's a failure if and only if the system call returns an address less
@c than the address requested.
@end deftypefun
diff --git a/libc/manual/message.texi b/libc/manual/message.texi
index e772b2de1..e44545a31 100644
--- a/libc/manual/message.texi
+++ b/libc/manual/message.texi
@@ -1466,7 +1466,7 @@ have this problem. But there is a very simple and powerful method to
handle these kind of problems with the @code{gettext} functions.
@noindent
-As as example consider the following fictional situation. A GUI program
+As an example consider the following fictional situation. A GUI program
has a menu bar with the following entries:
@smallexample
diff --git a/libc/manual/resource.texi b/libc/manual/resource.texi
index 4a814c9e4..05495722f 100644
--- a/libc/manual/resource.texi
+++ b/libc/manual/resource.texi
@@ -1288,7 +1288,7 @@ protected from concurrent accesses from different processors.
The POSIX standard up to this date is of not much help to solve this
problem. The Linux kernel provides a set of interfaces to allow
specifying @emph{affinity sets} for a process. The scheduler will
-schedule the thread or process on on CPUs specified by the affinity
+schedule the thread or process on CPUs specified by the affinity
masks. The interfaces which the GNU C library define follow to some
extend the Linux kernel interface.
diff --git a/libc/manual/stdio.texi b/libc/manual/stdio.texi
index 674851354..9fb209a47 100644
--- a/libc/manual/stdio.texi
+++ b/libc/manual/stdio.texi
@@ -574,7 +574,7 @@ operation itself is avoided. More importantly, functions like
introduction of threads) were implemented as macros which are very fast
if the buffer is not empty. With the addition of locking requirements
these functions are no longer implemented as macros since they would
-would expand to too much code.
+expand to too much code.
But these macros are still available with the same functionality under the new
names @code{putc_unlocked} and @code{getc_unlocked}. This possibly huge
difference of speed also suggests the use of the @code{_unlocked}
diff --git a/libc/manual/time.texi b/libc/manual/time.texi
index 393bccd99..f1f4254e9 100644
--- a/libc/manual/time.texi
+++ b/libc/manual/time.texi
@@ -972,7 +972,7 @@ This counter represents the number of calibration errors (caused by
large offsets or jitter).
@item long int stbcnt
-This counter denotes the number of of calibrations where the stability
+This counter denotes the number of calibrations where the stability
exceeded the threshold.
@end table
@end deftp
diff --git a/libc/math/fegetenv.c b/libc/math/fegetenv.c
index 4a878cc41..5b524307d 100644
--- a/libc/math/fegetenv.c
+++ b/libc/math/fegetenv.c
@@ -32,6 +32,7 @@ __fegetenv (fenv_t *envp)
strong_alias (__fegetenv, __old_fegetenv)
compat_symbol (libm, BP_SYM (__old_fegetenv), BP_SYM (fegetenv), GLIBC_2_1);
#endif
+libm_hidden_ver (__fegetenv, fegetenv)
versioned_symbol (libm, BP_SYM (__fegetenv), BP_SYM (fegetenv), GLIBC_2_2);
stub_warning (fegetenv)
diff --git a/libc/math/math_private.h b/libc/math/math_private.h
index fade7e118..e5ca61f0b 100644
--- a/libc/math/math_private.h
+++ b/libc/math/math_private.h
@@ -111,7 +111,7 @@ do { \
} while (0)
/* Get all in one, efficient on 64-bit machines. */
-#define INSERT_WORDS64(i,d) \
+#define INSERT_WORDS64(d,i) \
do { \
ieee_double_shape_type iw_u; \
iw_u.word = (i); \
diff --git a/libc/nptl/ChangeLog b/libc/nptl/ChangeLog
index c81eb03b7..f51ad4326 100644
--- a/libc/nptl/ChangeLog
+++ b/libc/nptl/ChangeLog
@@ -1,3 +1,28 @@
+2010-03-09 Ulrich Drepper <drepper@redhat.com>
+
+ * pthread_create.c (__pthread_create_2_1): If priorities are incorrect
+ and the call fails wake eventually waiting setxid threads. Don't free
+ stack here if we try starting a thread.
+ * sysdeps/pthread/createthread.c (do_clone): Only wake setxid waiter
+ if the clone call failed.
+
+2010-03-08 Andreas Schwab <schwab@redhat.com>
+
+ * pthread_create.c (__pthread_create_2_1): Don't set setxid_futex.
+ * allocatestack.c (get_cached_stack): Set setxid_futex.
+ (allocate_stack): Likewise.
+
+2010-03-05 Andreas Schwab <schwab@redhat.com>
+ Ulrich Drepper <drepper@redhat.com>
+
+ * allocatestack.c (setxid_mark_thread): Delay handling of thread if
+ it is creating a thread or it is just being created.
+ * pthread_create.c (start_thread): Wake setxid thread if it is
+ waiting.
+ (__pthread_create_2_1): Initialize setxid_futex.
+ * sysdeps/pthread/createthread.c (do_clone): Wake setxid thread if it
+ is waiting.
+
2010-01-15 Ulrich Drepper <drepper@redhat.com>
* sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S:
diff --git a/libc/nptl/allocatestack.c b/libc/nptl/allocatestack.c
index 3c3585fe3..831e98e4c 100644
--- a/libc/nptl/allocatestack.c
+++ b/libc/nptl/allocatestack.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2007, 2009 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2007, 2009, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@@ -213,6 +213,9 @@ get_cached_stack (size_t *sizep, void **memp)
return NULL;
}
+ /* Don't allow setxid until cloned. */
+ result->setxid_futex = -1;
+
/* Dequeue the entry. */
stack_list_del (&result->list);
@@ -380,7 +383,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
- TLS_TCB_SIZE - adj);
#elif TLS_DTV_AT_TP
pd = (struct pthread *) (((uintptr_t) attr->stackaddr
- - __static_tls_size - adj)
+ - __static_tls_size - adj)
- TLS_PRE_TCB_SIZE);
#endif
@@ -418,6 +421,9 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
/* The process ID is also the same as that of the caller. */
pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
+ /* Don't allow setxid until cloned. */
+ pd->setxid_futex = -1;
+
/* Allocate the DTV for this thread. */
if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
{
@@ -546,7 +552,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
#ifndef __ASSUME_PRIVATE_FUTEX
/* The thread must know when private futexes are supported. */
pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
- header.private_futex);
+ header.private_futex);
#endif
#ifdef NEED_DL_SYSINFO
@@ -554,6 +560,9 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
#endif
+ /* Don't allow setxid until cloned. */
+ pd->setxid_futex = -1;
+
/* The process ID is also the same as that of the caller. */
pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
@@ -969,6 +978,13 @@ setxid_mark_thread (struct xid_command *cmdp, struct pthread *t)
{
int ch;
+ /* Wait until this thread is cloned. */
+ if (t->setxid_futex == -1
+ && ! atomic_compare_and_exchange_bool_acq (&t->setxid_futex, -2, -1))
+ do
+ lll_futex_wait (&t->setxid_futex, -2, LLL_PRIVATE);
+ while (t->setxid_futex == -2);
+
/* Don't let the thread exit before the setxid handler runs. */
t->setxid_futex = 0;
diff --git a/libc/nptl/pthread_create.c b/libc/nptl/pthread_create.c
index ddf377cdb..649cdae8f 100644
--- a/libc/nptl/pthread_create.c
+++ b/libc/nptl/pthread_create.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2007,2008,2009 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2007,2008,2009,2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@@ -242,6 +242,10 @@ start_thread (void *arg)
__resp = &pd->res;
#endif
+ /* Allow setxid from now onwards. */
+ if (__builtin_expect (atomic_exchange_acq (&pd->setxid_futex, 0) == -2, 0))
+ lll_futex_wake (&pd->setxid_futex, 1, LLL_PRIVATE);
+
#ifdef __NR_set_robust_list
# ifndef __ASSUME_SET_ROBUST_LIST
if (__set_robust_list_avail >= 0)
@@ -538,33 +542,23 @@ __pthread_create_2_1 (newthread, attr, start_routine, arg)
if (pd->schedparam.sched_priority < minprio
|| pd->schedparam.sched_priority > maxprio)
{
- err = EINVAL;
- goto errout;
+ /* Perhaps a thread wants to change the IDs and if waiting
+ for this stillborn thread. */
+ if (__builtin_expect (atomic_exchange_acq (&pd->setxid_futex, 0)
+ == -2, 0))
+ lll_futex_wake (&pd->setxid_futex, 1, LLL_PRIVATE);
+
+ __deallocate_stack (pd);
+
+ return EINVAL;
}
}
/* Pass the descriptor to the caller. */
*newthread = (pthread_t) pd;
- /* Remember whether the thread is detached or not. In case of an
- error we have to free the stacks of non-detached stillborn
- threads. */
- bool is_detached = IS_DETACHED (pd);
-
/* Start the thread. */
- err = create_thread (pd, iattr, STACK_VARIABLES_ARGS);
- if (err != 0)
- {
- /* Something went wrong. Free the resources. */
- if (!is_detached)
- {
- errout:
- __deallocate_stack (pd);
- }
- return err;
- }
-
- return 0;
+ return create_thread (pd, iattr, STACK_VARIABLES_ARGS);
}
versioned_symbol (libpthread, __pthread_create_2_1, pthread_create, GLIBC_2_1);
diff --git a/libc/nptl/sysdeps/pthread/createthread.c b/libc/nptl/sysdeps/pthread/createthread.c
index 66fafe805..8d96387a9 100644
--- a/libc/nptl/sysdeps/pthread/createthread.c
+++ b/libc/nptl/sysdeps/pthread/createthread.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2007, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2007, 2008, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@@ -28,7 +28,7 @@
#include "kernel-features.h"
-#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD)
+#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD)
/* Unless otherwise specified, the thread "register" is going to be
initialized with a pointer to the TCB. */
@@ -72,16 +72,20 @@ do_clone (struct pthread *pd, const struct pthread_attr *attr,
that cares whether the thread count is correct. */
atomic_increment (&__nptl_nthreads);
- if (ARCH_CLONE (fct, STACK_VARIABLES_ARGS, clone_flags,
- pd, &pd->tid, TLS_VALUE, &pd->tid) == -1)
+ int rc = ARCH_CLONE (fct, STACK_VARIABLES_ARGS, clone_flags,
+ pd, &pd->tid, TLS_VALUE, &pd->tid);
+
+ if (__builtin_expect (rc == -1, 0))
{
atomic_decrement (&__nptl_nthreads); /* Oops, we lied for a second. */
- /* Failed. If the thread is detached, remove the TCB here since
- the caller cannot do this. The caller remembered the thread
- as detached and cannot reverify that it is not since it must
- not access the thread descriptor again. */
- if (IS_DETACHED (pd))
+ /* Perhaps a thread wants to change the IDs and if waiting
+ for this stillborn thread. */
+ if (__builtin_expect (atomic_exchange_acq (&pd->setxid_futex, 0)
+ == -2, 0))
+ lll_futex_wake (&pd->setxid_futex, 1, LLL_PRIVATE);
+
+ /* Free the resources. */
__deallocate_stack (pd);
/* We have to translate error codes. */
@@ -114,6 +118,9 @@ do_clone (struct pthread *pd, const struct pthread_attr *attr,
(void) INTERNAL_SYSCALL (tkill, err2, 2, pd->tid, SIGCANCEL);
#endif
+ /* We do not free the stack here because the canceled thread
+ itself will do this. */
+
return (INTERNAL_SYSCALL_ERROR_P (res, err)
? INTERNAL_SYSCALL_ERRNO (res, err)
: 0);
diff --git a/libc/resolv/res_send.c b/libc/resolv/res_send.c
index 28a47e42b..b0966ae03 100644
--- a/libc/resolv/res_send.c
+++ b/libc/resolv/res_send.c
@@ -490,6 +490,9 @@ __libc_res_nsend(res_state statp, const u_char *buf, int buflen,
for (try = 0; try < statp->retry; try++) {
for (ns = 0; ns < MAXNS; ns++)
{
+#ifdef DEBUG
+ char tmpbuf[40];
+#endif
struct sockaddr_in6 *nsap = EXT(statp).nsaddrs[ns];
if (nsap == NULL)
@@ -530,9 +533,6 @@ __libc_res_nsend(res_state statp, const u_char *buf, int buflen,
}
#endif
-#ifdef DEBUG
- char tmpbuf[40];
-#endif
Dprint(statp->options & RES_DEBUG,
(stdout, ";; Querying server (# %d) address = %s\n",
ns + 1, inet_ntop(AF_INET6, &nsap->sin6_addr,
@@ -575,11 +575,12 @@ __libc_res_nsend(res_state statp, const u_char *buf, int buflen,
(statp->pfcode & RES_PRF_REPLY),
(stdout, "%s", ""),
ans, (resplen > anssiz) ? anssiz : resplen);
- if (buf2 != NULL)
+ if (buf2 != NULL) {
DprintQ((statp->options & RES_DEBUG) ||
(statp->pfcode & RES_PRF_REPLY),
(stdout, "%s", ""),
*ansp2, (*resplen2 > *nansp2) ? *nansp2 : *resplen2);
+ }
/*
* If we have temporarily opened a virtual circuit,
@@ -883,7 +884,7 @@ send_vc(res_state statp,
(statp->pfcode & RES_PRF_REPLY),
(stdout, ";; old answer (unexpected):\n"),
*thisansp,
- (rlen > *thisanssiz) ? *thisanssiz: rlen);
+ (rlen > *thisanssizp) ? *thisanssizp: rlen);
goto read_len;
}
@@ -1186,7 +1187,7 @@ send_dg(res_state statp,
*/
Dprint(statp->options & RES_DEBUG,
(stdout, ";; undersized: %d\n",
- *thisresplen));
+ *thisresplenp));
*terrno = EMSGSIZE;
goto err_out;
}
@@ -1201,8 +1202,8 @@ send_dg(res_state statp,
(statp->pfcode & RES_PRF_REPLY),
(stdout, ";; old answer:\n"),
thisansp,
- (*thisresplen > *thisanssiz)
- ? *thisanssiz : *thisresplen);
+ (*thisresplenp > *thisanssizp)
+ ? *thisanssizp : *thisresplenp);
goto wait;
}
if (!(statp->options & RES_INSECURE1) &&
@@ -1216,8 +1217,8 @@ send_dg(res_state statp,
(statp->pfcode & RES_PRF_REPLY),
(stdout, ";; not our server:\n"),
thisansp,
- (*thisresplen > *thisanssiz)
- ? *thisanssiz : *thisresplen);
+ (*thisresplenp > *thisanssizp)
+ ? *thisanssizp : *thisresplenp);
goto wait;
}
#ifdef RES_USE_EDNS0
@@ -1232,9 +1233,9 @@ send_dg(res_state statp,
DprintQ(statp->options & RES_DEBUG,
(stdout,
"server rejected query with EDNS0:\n"),
- thisans,
- (*thisresplen > *thisanssiz)
- ? *thisanssiz : *thisresplen);
+ thisansp,
+ (*thisresplenp > *thisanssizp)
+ ? *thisanssizp : *thisresplenp);
/* record the error */
statp->_flags |= RES_F_EDNS0ERR;
goto err_out;
@@ -1258,8 +1259,8 @@ send_dg(res_state statp,
(statp->pfcode & RES_PRF_REPLY),
(stdout, ";; wrong query name:\n"),
thisansp,
- (*thisresplen > *thisanssiz)
- ? *thisanssiz : *thisresplen);
+ (*thisresplenp > *thisanssizp)
+ ? *thisanssizp : *thisresplenp);
goto wait;
}
if (anhp->rcode == SERVFAIL ||
@@ -1268,8 +1269,8 @@ send_dg(res_state statp,
DprintQ(statp->options & RES_DEBUG,
(stdout, "server rejected query:\n"),
thisansp,
- (*thisresplen > *thisanssiz)
- ? *thisanssiz : *thisresplen);
+ (*thisresplenp > *thisanssizp)
+ ? *thisanssizp : *thisresplenp);
if (recvresp1 || (buf2 != NULL && recvresp2))
return resplen;
@@ -1295,8 +1296,8 @@ send_dg(res_state statp,
DprintQ(statp->options & RES_DEBUG,
(stdout, "referred query:\n"),
thisansp,
- (*thisresplen > *thisanssiz)
- ? *thisanssiz : *thisresplen);
+ (*thisresplenp > *thisanssizp)
+ ? *thisanssizp : *thisresplenp);
goto next_ns;
}
if (!(statp->options & RES_IGNTC) && anhp->tc) {
diff --git a/libc/sysdeps/i386/configure b/libc/sysdeps/i386/configure
index f0c2758a5..7814b3b31 100755
--- a/libc/sysdeps/i386/configure
+++ b/libc/sysdeps/i386/configure
@@ -637,3 +637,22 @@ if test $libc_cv_cc_sse4 = yes; then
_ACEOF
fi
+
+{ $as_echo "$as_me:$LINENO: checking for assembler -mtune=i686 support" >&5
+$as_echo_n "checking for assembler -mtune=i686 support... " >&6; }
+if test "${libc_cv_as_i686+set}" = set; then
+ $as_echo_n "(cached) " >&6
+else
+ if { ac_try='${CC-cc} -Wa,-mtune=i686 -xc /dev/null -S -o /dev/null'
+ { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ libc_cv_as_i686=yes
+else
+ libc_cv_as_i686=no
+fi
+fi
+{ $as_echo "$as_me:$LINENO: result: $libc_cv_as_i686" >&5
+$as_echo "$libc_cv_as_i686" >&6; }
diff --git a/libc/sysdeps/i386/configure.in b/libc/sysdeps/i386/configure.in
index 12dceaf84..9fc7fa59f 100644
--- a/libc/sysdeps/i386/configure.in
+++ b/libc/sysdeps/i386/configure.in
@@ -47,3 +47,11 @@ fi])
if test $libc_cv_cc_sse4 = yes; then
AC_DEFINE(HAVE_SSE4_SUPPORT)
fi
+
+dnl Check if -Wa,-mtune=i686 works.
+AC_CACHE_CHECK(for assembler -mtune=i686 support, libc_cv_as_i686, [dnl
+if AC_TRY_COMMAND([${CC-cc} -Wa,-mtune=i686 -xc /dev/null -S -o /dev/null]); then
+ libc_cv_as_i686=yes
+else
+ libc_cv_as_i686=no
+fi])
diff --git a/libc/sysdeps/i386/fpu/fegetenv.c b/libc/sysdeps/i386/fpu/fegetenv.c
index fb955cf56..ddb67e5d8 100644
--- a/libc/sysdeps/i386/fpu/fegetenv.c
+++ b/libc/sysdeps/i386/fpu/fegetenv.c
@@ -40,4 +40,5 @@ strong_alias (__fegetenv, __old_fegetenv)
compat_symbol (libm, BP_SYM (__old_fegetenv), BP_SYM (fegetenv), GLIBC_2_1);
#endif
+libm_hidden_ver (__fegetenv, fegetenv)
versioned_symbol (libm, BP_SYM (__fegetenv), BP_SYM (fegetenv), GLIBC_2_2);
diff --git a/libc/sysdeps/i386/i686/Makefile b/libc/sysdeps/i386/i686/Makefile
index dbcf1c33d..e6b292458 100644
--- a/libc/sysdeps/i386/i686/Makefile
+++ b/libc/sysdeps/i386/i686/Makefile
@@ -9,3 +9,19 @@ stack-align-test-flags += -msse
ifeq ($(subdir),string)
sysdep_routines += cacheinfo
endif
+
+ifeq (yes,$(config-asflags-i686))
+CFLAGS-.o += -Wa,-mtune=i686
+CFLAGS-.os += -Wa,-mtune=i686
+CFLAGS-.op += -Wa,-mtune=i686
+CFLAGS-.og += -Wa,-mtune=i686
+CFLAGS-.ob += -Wa,-mtune=i686
+CFLAGS-.oS += -Wa,-mtune=i686
+
+ASFLAGS-.o += -Wa,-mtune=i686
+ASFLAGS-.os += -Wa,-mtune=i686
+ASFLAGS-.op += -Wa,-mtune=i686
+ASFLAGS-.og += -Wa,-mtune=i686
+ASFLAGS-.ob += -Wa,-mtune=i686
+ASFLAGS-.oS += -Wa,-mtune=i686
+endif
diff --git a/libc/sysdeps/i386/i686/multiarch/Makefile b/libc/sysdeps/i386/i686/multiarch/Makefile
index fbad9ae73..e8847d6fc 100644
--- a/libc/sysdeps/i386/i686/multiarch/Makefile
+++ b/libc/sysdeps/i386/i686/multiarch/Makefile
@@ -7,7 +7,9 @@ ifeq ($(subdir),string)
sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \
memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \
- memset-sse2-rep bzero-sse2-rep
+ memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \
+ strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \
+ memcmp-ssse3 memcmp-sse4
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
CFLAGS-strcspn-c.c += -msse4
diff --git a/libc/sysdeps/i386/i686/multiarch/memcmp-sse4.S b/libc/sysdeps/i386/i686/multiarch/memcmp-sse4.S
new file mode 100644
index 000000000..b1ed778f1
--- /dev/null
+++ b/libc/sysdeps/i386/i686/multiarch/memcmp-sse4.S
@@ -0,0 +1,1004 @@
+/* memcmp with SSE4.2
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef NOT_IN_libc
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#ifndef MEMCMP
+# define MEMCMP __memcmp_sse4_2
+#endif
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#define PARMS 4
+#define BLK1 PARMS
+#define BLK2 BLK1+4
+#define LEN BLK2+4
+#define RETURN POP (%ebx); ret; CFI_PUSH (%ebx)
+
+
+#ifdef SHARED
+# define JMPTBL(I, B) I - B
+
+/* Load an entry in a jump table into EBX and branch to it. TABLE is a
+ jump table with relative offsets. INDEX is a register contains the
+ index into the jump table. SCALE is the scale of INDEX. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ /* We first load PC into EBX. */ \
+ call __i686.get_pc_thunk.bx; \
+ /* Get the address of the jump table. */ \
+ addl $(TABLE - .), %ebx; \
+ /* Get the entry and convert the relative offset to the \
+ absolute address. */ \
+ addl (%ebx,INDEX,SCALE), %ebx; \
+ /* We loaded the jump table and adjuested EDX/ESI. Go. */ \
+ jmp *%ebx
+
+ .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+ .globl __i686.get_pc_thunk.bx
+ .hidden __i686.get_pc_thunk.bx
+ ALIGN (4)
+ .type __i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+ movl (%esp), %ebx
+ ret
+#else
+# define JMPTBL(I, B) I
+
+/* Load an entry in a jump table into EBX and branch to it. TABLE is a
+ jump table with relative offsets. INDEX is a register contains the
+ index into the jump table. SCALE is the scale of INDEX. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ jmp *TABLE(,INDEX,SCALE)
+#endif
+
+ .section .text.sse4.2,"ax",@progbits
+ENTRY (MEMCMP)
+ movl BLK1(%esp), %eax
+ movl BLK2(%esp), %edx
+ movl LEN(%esp), %ecx
+ cmp $1, %ecx
+ jbe L(less1bytes)
+ pxor %xmm0, %xmm0
+ cmp $64, %ecx
+ ja L(64bytesormore)
+ cmp $8, %ecx
+ PUSH (%ebx)
+ jb L(less8bytes)
+ add %ecx, %edx
+ add %ecx, %eax
+ BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
+
+ ALIGN (4)
+L(less8bytes):
+ mov (%eax), %bl
+ cmpb (%edx), %bl
+ jne L(nonzero)
+
+ mov 1(%eax), %bl
+ cmpb 1(%edx), %bl
+ jne L(nonzero)
+
+ cmp $2, %ecx
+ jz L(0bytes)
+
+ mov 2(%eax), %bl
+ cmpb 2(%edx), %bl
+ jne L(nonzero)
+
+ cmp $3, %ecx
+ jz L(0bytes)
+
+ mov 3(%eax), %bl
+ cmpb 3(%edx), %bl
+ jne L(nonzero)
+
+ cmp $4, %ecx
+ jz L(0bytes)
+
+ mov 4(%eax), %bl
+ cmpb 4(%edx), %bl
+ jne L(nonzero)
+
+ cmp $5, %ecx
+ jz L(0bytes)
+
+ mov 5(%eax), %bl
+ cmpb 5(%edx), %bl
+ jne L(nonzero)
+
+ cmp $6, %ecx
+ jz L(0bytes)
+
+ mov 6(%eax), %bl
+ cmpb 6(%edx), %bl
+ je L(0bytes)
+L(nonzero):
+ POP (%ebx)
+ mov $1, %eax
+ ja L(above)
+ neg %eax
+L(above):
+ ret
+ CFI_PUSH (%ebx)
+
+ ALIGN (4)
+L(0bytes):
+ POP (%ebx)
+ xor %eax, %eax
+ ret
+
+ ALIGN (4)
+L(less1bytes):
+ jb L(0bytesend)
+ movzbl (%eax), %eax
+ movzbl (%edx), %edx
+ sub %edx, %eax
+ ret
+
+ ALIGN (4)
+L(0bytesend):
+ xor %eax, %eax
+ ret
+
+ ALIGN (4)
+L(64bytesormore):
+ PUSH (%ebx)
+ mov %ecx, %ebx
+ mov $64, %ecx
+ sub $64, %ebx
+L(64bytesormore_loop):
+ movdqu (%eax), %xmm1
+ movdqu (%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(find_16diff)
+
+ movdqu 16(%eax), %xmm1
+ movdqu 16(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(find_32diff)
+
+ movdqu 32(%eax), %xmm1
+ movdqu 32(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(find_48diff)
+
+ movdqu 48(%eax), %xmm1
+ movdqu 48(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(find_64diff)
+ add %ecx, %eax
+ add %ecx, %edx
+ sub %ecx, %ebx
+ jae L(64bytesormore_loop)
+ add %ebx, %ecx
+ add %ecx, %edx
+ add %ecx, %eax
+ BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
+
+ ALIGN (4)
+L(find_16diff):
+ sub $16, %ecx
+L(find_32diff):
+ sub $16, %ecx
+L(find_48diff):
+ sub $16, %ecx
+L(find_64diff):
+ add %ecx, %edx
+ add %ecx, %eax
+ jmp L(16bytes)
+
+ ALIGN (4)
+L(16bytes):
+ mov -16(%eax), %ecx
+ mov -16(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(12bytes):
+ mov -12(%eax), %ecx
+ mov -12(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(8bytes):
+ mov -8(%eax), %ecx
+ mov -8(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(4bytes):
+ mov -4(%eax), %ecx
+ mov -4(%edx), %ebx
+ cmp %ebx, %ecx
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+
+ ALIGN (4)
+L(49bytes):
+ movdqu -49(%eax), %xmm1
+ movdqu -49(%edx), %xmm2
+ mov $-49, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(33bytes):
+ movdqu -33(%eax), %xmm1
+ movdqu -33(%edx), %xmm2
+ mov $-33, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(17bytes):
+ mov -17(%eax), %ecx
+ mov -17(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(13bytes):
+ mov -13(%eax), %ecx
+ mov -13(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(9bytes):
+ mov -9(%eax), %ecx
+ mov -9(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(5bytes):
+ mov -5(%eax), %ecx
+ mov -5(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzbl -1(%eax), %ecx
+ cmp -1(%edx), %cl
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(50bytes):
+ mov $-50, %ebx
+ movdqu -50(%eax), %xmm1
+ movdqu -50(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(34bytes):
+ mov $-34, %ebx
+ movdqu -34(%eax), %xmm1
+ movdqu -34(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(18bytes):
+ mov -18(%eax), %ecx
+ mov -18(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(14bytes):
+ mov -14(%eax), %ecx
+ mov -14(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(10bytes):
+ mov -10(%eax), %ecx
+ mov -10(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(6bytes):
+ mov -6(%eax), %ecx
+ mov -6(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(2bytes):
+ movzwl -2(%eax), %ecx
+ movzwl -2(%edx), %ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bh, %ch
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(51bytes):
+ mov $-51, %ebx
+ movdqu -51(%eax), %xmm1
+ movdqu -51(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(35bytes):
+ mov $-35, %ebx
+ movdqu -35(%eax), %xmm1
+ movdqu -35(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(19bytes):
+ movl -19(%eax), %ecx
+ movl -19(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(15bytes):
+ movl -15(%eax), %ecx
+ movl -15(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(11bytes):
+ movl -11(%eax), %ecx
+ movl -11(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(7bytes):
+ movl -7(%eax), %ecx
+ movl -7(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(3bytes):
+ movzwl -3(%eax), %ecx
+ movzwl -3(%edx), %ebx
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+L(1bytes):
+ movzbl -1(%eax), %eax
+ cmpb -1(%edx), %al
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(52bytes):
+ movdqu -52(%eax), %xmm1
+ movdqu -52(%edx), %xmm2
+ mov $-52, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(36bytes):
+ movdqu -36(%eax), %xmm1
+ movdqu -36(%edx), %xmm2
+ mov $-36, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(20bytes):
+ movdqu -20(%eax), %xmm1
+ movdqu -20(%edx), %xmm2
+ mov $-20, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ mov -4(%eax), %ecx
+ mov -4(%edx), %ebx
+ cmp %ebx, %ecx
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+
+ ALIGN (4)
+L(53bytes):
+ movdqu -53(%eax), %xmm1
+ movdqu -53(%edx), %xmm2
+ mov $-53, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(37bytes):
+ mov $-37, %ebx
+ movdqu -37(%eax), %xmm1
+ movdqu -37(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(21bytes):
+ mov $-21, %ebx
+ movdqu -21(%eax), %xmm1
+ movdqu -21(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ mov -5(%eax), %ecx
+ mov -5(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzbl -1(%eax), %ecx
+ cmp -1(%edx), %cl
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(54bytes):
+ movdqu -54(%eax), %xmm1
+ movdqu -54(%edx), %xmm2
+ mov $-54, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(38bytes):
+ mov $-38, %ebx
+ movdqu -38(%eax), %xmm1
+ movdqu -38(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(22bytes):
+ mov $-22, %ebx
+ movdqu -22(%eax), %xmm1
+ movdqu -22(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ mov -6(%eax), %ecx
+ mov -6(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzwl -2(%eax), %ecx
+ movzwl -2(%edx), %ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bh, %ch
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(55bytes):
+ movdqu -55(%eax), %xmm1
+ movdqu -55(%edx), %xmm2
+ mov $-55, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(39bytes):
+ mov $-39, %ebx
+ movdqu -39(%eax), %xmm1
+ movdqu -39(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(23bytes):
+ mov $-23, %ebx
+ movdqu -23(%eax), %xmm1
+ movdqu -23(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ movl -7(%eax), %ecx
+ movl -7(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzwl -3(%eax), %ecx
+ movzwl -3(%edx), %ebx
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+ movzbl -1(%eax), %eax
+ cmpb -1(%edx), %al
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(56bytes):
+ movdqu -56(%eax), %xmm1
+ movdqu -56(%edx), %xmm2
+ mov $-56, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(40bytes):
+ mov $-40, %ebx
+ movdqu -40(%eax), %xmm1
+ movdqu -40(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(24bytes):
+ mov $-24, %ebx
+ movdqu -24(%eax), %xmm1
+ movdqu -24(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ mov -8(%eax), %ecx
+ mov -8(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov -4(%eax), %ecx
+ mov -4(%edx), %ebx
+ cmp %ebx, %ecx
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+
+ ALIGN (4)
+L(57bytes):
+ movdqu -57(%eax), %xmm1
+ movdqu -57(%edx), %xmm2
+ mov $-57, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(41bytes):
+ mov $-41, %ebx
+ movdqu -41(%eax), %xmm1
+ movdqu -41(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(25bytes):
+ mov $-25, %ebx
+ movdqu -25(%eax), %xmm1
+ movdqu -25(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ mov -9(%eax), %ecx
+ mov -9(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ mov -5(%eax), %ecx
+ mov -5(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzbl -1(%eax), %ecx
+ cmp -1(%edx), %cl
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(58bytes):
+ movdqu -58(%eax), %xmm1
+ movdqu -58(%edx), %xmm2
+ mov $-58, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(42bytes):
+ mov $-42, %ebx
+ movdqu -42(%eax), %xmm1
+ movdqu -42(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(26bytes):
+ mov $-26, %ebx
+ movdqu -26(%eax), %xmm1
+ movdqu -26(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ mov -10(%eax), %ecx
+ mov -10(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov -6(%eax), %ecx
+ mov -6(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ movzwl -2(%eax), %ecx
+ movzwl -2(%edx), %ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bh, %ch
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(59bytes):
+ movdqu -59(%eax), %xmm1
+ movdqu -59(%edx), %xmm2
+ mov $-59, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(43bytes):
+ mov $-43, %ebx
+ movdqu -43(%eax), %xmm1
+ movdqu -43(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(27bytes):
+ mov $-27, %ebx
+ movdqu -27(%eax), %xmm1
+ movdqu -27(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ movl -11(%eax), %ecx
+ movl -11(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movl -7(%eax), %ecx
+ movl -7(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzwl -3(%eax), %ecx
+ movzwl -3(%edx), %ebx
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+ movzbl -1(%eax), %eax
+ cmpb -1(%edx), %al
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(60bytes):
+ movdqu -60(%eax), %xmm1
+ movdqu -60(%edx), %xmm2
+ mov $-60, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(44bytes):
+ mov $-44, %ebx
+ movdqu -44(%eax), %xmm1
+ movdqu -44(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(28bytes):
+ mov $-28, %ebx
+ movdqu -28(%eax), %xmm1
+ movdqu -28(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ mov -12(%eax), %ecx
+ mov -12(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ mov -8(%eax), %ecx
+ mov -8(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ mov -4(%eax), %ecx
+ mov -4(%edx), %ebx
+ cmp %ebx, %ecx
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+
+ ALIGN (4)
+L(61bytes):
+ movdqu -61(%eax), %xmm1
+ movdqu -61(%edx), %xmm2
+ mov $-61, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(45bytes):
+ mov $-45, %ebx
+ movdqu -45(%eax), %xmm1
+ movdqu -45(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(29bytes):
+ mov $-29, %ebx
+ movdqu -29(%eax), %xmm1
+ movdqu -29(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ mov -13(%eax), %ecx
+ mov -13(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov -9(%eax), %ecx
+ mov -9(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov -5(%eax), %ecx
+ mov -5(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzbl -1(%eax), %ecx
+ cmp -1(%edx), %cl
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(62bytes):
+ movdqu -62(%eax), %xmm1
+ movdqu -62(%edx), %xmm2
+ mov $-62, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(46bytes):
+ mov $-46, %ebx
+ movdqu -46(%eax), %xmm1
+ movdqu -46(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(30bytes):
+ mov $-30, %ebx
+ movdqu -30(%eax), %xmm1
+ movdqu -30(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+ mov -14(%eax), %ecx
+ mov -14(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ mov -10(%eax), %ecx
+ mov -10(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ mov -6(%eax), %ecx
+ mov -6(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzwl -2(%eax), %ecx
+ movzwl -2(%edx), %ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bh, %ch
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(63bytes):
+ movdqu -63(%eax), %xmm1
+ movdqu -63(%edx), %xmm2
+ mov $-63, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(47bytes):
+ mov $-47, %ebx
+ movdqu -47(%eax), %xmm1
+ movdqu -47(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(31bytes):
+ mov $-31, %ebx
+ movdqu -31(%eax), %xmm1
+ movdqu -31(%edx), %xmm2
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ movl -15(%eax), %ecx
+ movl -15(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movl -11(%eax), %ecx
+ movl -11(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movl -7(%eax), %ecx
+ movl -7(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzwl -3(%eax), %ecx
+ movzwl -3(%edx), %ebx
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+ movzbl -1(%eax), %eax
+ cmpb -1(%edx), %al
+ mov $0, %eax
+ jne L(end)
+ RETURN
+
+ ALIGN (4)
+L(64bytes):
+ movdqu -64(%eax), %xmm1
+ movdqu -64(%edx), %xmm2
+ mov $-64, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(48bytes):
+ movdqu -48(%eax), %xmm1
+ movdqu -48(%edx), %xmm2
+ mov $-48, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+L(32bytes):
+ movdqu -32(%eax), %xmm1
+ movdqu -32(%edx), %xmm2
+ mov $-32, %ebx
+ pxor %xmm1, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+ mov -16(%eax), %ecx
+ mov -16(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov -12(%eax), %ecx
+ mov -12(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov -8(%eax), %ecx
+ mov -8(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov -4(%eax), %ecx
+ mov -4(%edx), %ebx
+ cmp %ebx, %ecx
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+
+ ALIGN (4)
+L(less16bytes):
+ add %ebx, %eax
+ add %ebx, %edx
+
+ mov (%eax), %ecx
+ mov (%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov 4(%eax), %ecx
+ mov 4(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov 8(%eax), %ecx
+ mov 8(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+
+ mov 12(%eax), %ecx
+ mov 12(%edx), %ebx
+ cmp %ebx, %ecx
+ mov $0, %eax
+ jne L(find_diff)
+ RETURN
+
+ ALIGN (4)
+L(find_diff):
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+ shr $16,%ecx
+ shr $16,%ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+L(end):
+ POP (%ebx)
+ mov $1, %eax
+ ja L(bigger)
+ neg %eax
+L(bigger):
+ ret
+END (MEMCMP)
+
+ .section .rodata.sse4.2,"a",@progbits
+ ALIGN (2)
+ .type L(table_64bytes), @object
+L(table_64bytes):
+ .int JMPTBL (L(0bytes), L(table_64bytes))
+ .int JMPTBL (L(1bytes), L(table_64bytes))
+ .int JMPTBL (L(2bytes), L(table_64bytes))
+ .int JMPTBL (L(3bytes), L(table_64bytes))
+ .int JMPTBL (L(4bytes), L(table_64bytes))
+ .int JMPTBL (L(5bytes), L(table_64bytes))
+ .int JMPTBL (L(6bytes), L(table_64bytes))
+ .int JMPTBL (L(7bytes), L(table_64bytes))
+ .int JMPTBL (L(8bytes), L(table_64bytes))
+ .int JMPTBL (L(9bytes), L(table_64bytes))
+ .int JMPTBL (L(10bytes), L(table_64bytes))
+ .int JMPTBL (L(11bytes), L(table_64bytes))
+ .int JMPTBL (L(12bytes), L(table_64bytes))
+ .int JMPTBL (L(13bytes), L(table_64bytes))
+ .int JMPTBL (L(14bytes), L(table_64bytes))
+ .int JMPTBL (L(15bytes), L(table_64bytes))
+ .int JMPTBL (L(16bytes), L(table_64bytes))
+ .int JMPTBL (L(17bytes), L(table_64bytes))
+ .int JMPTBL (L(18bytes), L(table_64bytes))
+ .int JMPTBL (L(19bytes), L(table_64bytes))
+ .int JMPTBL (L(20bytes), L(table_64bytes))
+ .int JMPTBL (L(21bytes), L(table_64bytes))
+ .int JMPTBL (L(22bytes), L(table_64bytes))
+ .int JMPTBL (L(23bytes), L(table_64bytes))
+ .int JMPTBL (L(24bytes), L(table_64bytes))
+ .int JMPTBL (L(25bytes), L(table_64bytes))
+ .int JMPTBL (L(26bytes), L(table_64bytes))
+ .int JMPTBL (L(27bytes), L(table_64bytes))
+ .int JMPTBL (L(28bytes), L(table_64bytes))
+ .int JMPTBL (L(29bytes), L(table_64bytes))
+ .int JMPTBL (L(30bytes), L(table_64bytes))
+ .int JMPTBL (L(31bytes), L(table_64bytes))
+ .int JMPTBL (L(32bytes), L(table_64bytes))
+ .int JMPTBL (L(33bytes), L(table_64bytes))
+ .int JMPTBL (L(34bytes), L(table_64bytes))
+ .int JMPTBL (L(35bytes), L(table_64bytes))
+ .int JMPTBL (L(36bytes), L(table_64bytes))
+ .int JMPTBL (L(37bytes), L(table_64bytes))
+ .int JMPTBL (L(38bytes), L(table_64bytes))
+ .int JMPTBL (L(39bytes), L(table_64bytes))
+ .int JMPTBL (L(40bytes), L(table_64bytes))
+ .int JMPTBL (L(41bytes), L(table_64bytes))
+ .int JMPTBL (L(42bytes), L(table_64bytes))
+ .int JMPTBL (L(43bytes), L(table_64bytes))
+ .int JMPTBL (L(44bytes), L(table_64bytes))
+ .int JMPTBL (L(45bytes), L(table_64bytes))
+ .int JMPTBL (L(46bytes), L(table_64bytes))
+ .int JMPTBL (L(47bytes), L(table_64bytes))
+ .int JMPTBL (L(48bytes), L(table_64bytes))
+ .int JMPTBL (L(49bytes), L(table_64bytes))
+ .int JMPTBL (L(50bytes), L(table_64bytes))
+ .int JMPTBL (L(51bytes), L(table_64bytes))
+ .int JMPTBL (L(52bytes), L(table_64bytes))
+ .int JMPTBL (L(53bytes), L(table_64bytes))
+ .int JMPTBL (L(54bytes), L(table_64bytes))
+ .int JMPTBL (L(55bytes), L(table_64bytes))
+ .int JMPTBL (L(56bytes), L(table_64bytes))
+ .int JMPTBL (L(57bytes), L(table_64bytes))
+ .int JMPTBL (L(58bytes), L(table_64bytes))
+ .int JMPTBL (L(59bytes), L(table_64bytes))
+ .int JMPTBL (L(60bytes), L(table_64bytes))
+ .int JMPTBL (L(61bytes), L(table_64bytes))
+ .int JMPTBL (L(62bytes), L(table_64bytes))
+ .int JMPTBL (L(63bytes), L(table_64bytes))
+ .int JMPTBL (L(64bytes), L(table_64bytes))
+ .size L(table_64bytes), .-L(table_64bytes)
+#endif
diff --git a/libc/sysdeps/i386/i686/multiarch/memcmp-ssse3.S b/libc/sysdeps/i386/i686/multiarch/memcmp-ssse3.S
new file mode 100644
index 000000000..d2f852f72
--- /dev/null
+++ b/libc/sysdeps/i386/i686/multiarch/memcmp-ssse3.S
@@ -0,0 +1,1966 @@
+/* memcmp with SSSE3
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef NOT_IN_libc
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#ifndef MEMCMP
+# define MEMCMP __memcmp_ssse3
+#endif
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#define PARMS 4
+#define BLK1 PARMS
+#define BLK2 BLK1+4
+#define LEN BLK2+4
+#define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret
+#define RETURN RETURN_END; cfi_restore_state; cfi_remember_state
+
+ .section .text.ssse3,"ax",@progbits
+ENTRY (MEMCMP)
+ movl LEN(%esp), %ecx
+ movl BLK1(%esp), %eax
+ cmp $48, %ecx
+ movl BLK2(%esp), %edx
+ jae L(48bytesormore)
+ cmp $1, %ecx
+ jbe L(less1bytes)
+ PUSH (%ebx)
+ add %ecx, %edx
+ add %ecx, %eax
+ jmp L(less48bytes)
+
+ ALIGN (4)
+ CFI_POP (%ebx)
+L(less1bytes):
+ jb L(zero)
+ movb (%eax), %cl
+ cmp (%edx), %cl
+ je L(zero)
+ mov $1, %eax
+ ja L(1bytesend)
+ neg %eax
+L(1bytesend):
+ ret
+
+ ALIGN (4)
+L(zero):
+ mov $0, %eax
+ ret
+
+ ALIGN (4)
+L(48bytesormore):
+ PUSH (%ebx)
+ PUSH (%esi)
+ PUSH (%edi)
+ cfi_remember_state
+ movdqu (%eax), %xmm3
+ movdqu (%edx), %xmm0
+ movl %eax, %edi
+ movl %edx, %esi
+ pcmpeqb %xmm0, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 16(%edi), %edi
+
+ sub $0xffff, %edx
+ lea 16(%esi), %esi
+ jnz L(less16bytes)
+ mov %edi, %edx
+ and $0xf, %edx
+ xor %edx, %edi
+ sub %edx, %esi
+ add %edx, %ecx
+ mov %esi, %edx
+ and $0xf, %edx
+ jz L(shr_0)
+ xor %edx, %esi
+
+ cmp $8, %edx
+ jae L(next_unaligned_table)
+ cmp $0, %edx
+ je L(shr_0)
+ cmp $1, %edx
+ je L(shr_1)
+ cmp $2, %edx
+ je L(shr_2)
+ cmp $3, %edx
+ je L(shr_3)
+ cmp $4, %edx
+ je L(shr_4)
+ cmp $5, %edx
+ je L(shr_5)
+ cmp $6, %edx
+ je L(shr_6)
+ jmp L(shr_7)
+
+ ALIGN (4)
+L(next_unaligned_table):
+ cmp $8, %edx
+ je L(shr_8)
+ cmp $9, %edx
+ je L(shr_9)
+ cmp $10, %edx
+ je L(shr_10)
+ cmp $11, %edx
+ je L(shr_11)
+ cmp $12, %edx
+ je L(shr_12)
+ cmp $13, %edx
+ je L(shr_13)
+ cmp $14, %edx
+ je L(shr_14)
+ jmp L(shr_15)
+
+ ALIGN (4)
+L(shr_0):
+ cmp $80, %ecx
+ jae L(shr_0_gobble)
+ lea -48(%ecx), %ecx
+ xor %eax, %eax
+ movaps (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+ movaps 16(%esi), %xmm2
+ pcmpeqb 16(%edi), %xmm2
+ pand %xmm1, %xmm2
+ pmovmskb %xmm2, %edx
+ add $32, %edi
+ add $32, %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea (%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_0_gobble):
+ lea -48(%ecx), %ecx
+ movdqa (%esi), %xmm0
+ xor %eax, %eax
+ pcmpeqb (%edi), %xmm0
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm2
+ pcmpeqb 16(%edi), %xmm2
+L(shr_0_gobble_loop):
+ pand %xmm0, %xmm2
+ sub $32, %ecx
+ pmovmskb %xmm2, %edx
+ movdqa %xmm0, %xmm1
+ movdqa 32(%esi), %xmm0
+ movdqa 48(%esi), %xmm2
+ sbb $0xffff, %edx
+ pcmpeqb 32(%edi), %xmm0
+ pcmpeqb 48(%edi), %xmm2
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ jz L(shr_0_gobble_loop)
+
+ pand %xmm0, %xmm2
+ cmp $0, %ecx
+ jge L(shr_0_gobble_loop_next)
+ inc %edx
+ add $32, %ecx
+L(shr_0_gobble_loop_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm2, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea (%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_1):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_1_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $1,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $1,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 1(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_1_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $1,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $1,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_1_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $1,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $1,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_1_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_1_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_1_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 1(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_2):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_2_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $2,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $2,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 2(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_2_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $2,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $2,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_2_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $2,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $2,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_2_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_2_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_2_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 2(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_3):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_3_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $3,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $3,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 3(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_3_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $3,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $3,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_3_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $3,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $3,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_3_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_3_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_3_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 3(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_4):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_4_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $4,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $4,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 4(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_4_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $4,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $4,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_4_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $4,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $4,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_4_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_4_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_4_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 4(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_5):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_5_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $5,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $5,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 5(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_5_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $5,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $5,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_5_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $5,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $5,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_5_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_5_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_5_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 5(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_6):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_6_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $6,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $6,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 6(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_6_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $6,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $6,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_6_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $6,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $6,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_6_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_6_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_6_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 6(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_7):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_7_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $7,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $7,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 7(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_7_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $7,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $7,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_7_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $7,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $7,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_7_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_7_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_7_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 7(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_8):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_8_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $8,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $8,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 8(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_8_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $8,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $8,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_8_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $8,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $8,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_8_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_8_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_8_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 8(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_9):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_9_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $9,(%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $9,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 9(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_9_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $9,(%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $9,16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_9_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $9,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $9,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_9_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_9_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_9_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 9(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_10):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_10_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $10, (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $10,%xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 10(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_10_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $10, (%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $10, 16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_10_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $10,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $10,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_10_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_10_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_10_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 10(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_11):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_11_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $11, (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $11, %xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 11(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_11_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $11, (%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $11, 16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_11_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $11,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $11,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_11_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_11_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_11_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 11(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_12):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_12_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $12, (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $12, %xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 12(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_12_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $12, (%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $12, 16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_12_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $12,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $12,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_12_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_12_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_12_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 12(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_13):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_13_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $13, (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $13, %xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 13(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_13_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $13, (%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $13, 16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_13_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $13,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $13,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_13_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_13_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_13_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 13(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_14):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_14_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $14, (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $14, %xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 14(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_14_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $14, (%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $14, 16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_14_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $14,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $14,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_14_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_14_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_14_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 14(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_15):
+ cmp $80, %ecx
+ lea -48(%ecx), %ecx
+ mov %edx, %eax
+ jae L(shr_15_gobble)
+
+ movdqa 16(%esi), %xmm1
+ movdqa %xmm1, %xmm2
+ palignr $15, (%esi), %xmm1
+ pcmpeqb (%edi), %xmm1
+
+ movdqa 32(%esi), %xmm3
+ palignr $15, %xmm2, %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+ pand %xmm1, %xmm3
+ pmovmskb %xmm3, %edx
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+ lea (%ecx, %edi,1), %eax
+ lea 15(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shr_15_gobble):
+ sub $32, %ecx
+ movdqa 16(%esi), %xmm0
+ palignr $15, (%esi), %xmm0
+ pcmpeqb (%edi), %xmm0
+
+ movdqa 32(%esi), %xmm3
+ palignr $15, 16(%esi), %xmm3
+ pcmpeqb 16(%edi), %xmm3
+
+L(shr_15_gobble_loop):
+ pand %xmm0, %xmm3
+ sub $32, %ecx
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+
+ movdqa 64(%esi), %xmm3
+ palignr $15,48(%esi), %xmm3
+ sbb $0xffff, %edx
+ movdqa 48(%esi), %xmm0
+ palignr $15,32(%esi), %xmm0
+ pcmpeqb 32(%edi), %xmm0
+ lea 32(%esi), %esi
+ pcmpeqb 48(%edi), %xmm3
+
+ lea 32(%edi), %edi
+ jz L(shr_15_gobble_loop)
+
+ cmp $0, %ecx
+ jge L(shr_15_gobble_next)
+ inc %edx
+ add $32, %ecx
+L(shr_15_gobble_next):
+ test %edx, %edx
+ jnz L(exit)
+
+ pmovmskb %xmm3, %edx
+ movdqa %xmm0, %xmm1
+ lea 32(%edi), %edi
+ lea 32(%esi), %esi
+ sub $0xffff, %edx
+ jnz L(exit)
+
+ lea (%ecx, %edi,1), %eax
+ lea 15(%ecx, %esi,1), %edx
+ POP (%edi)
+ POP (%esi)
+ jmp L(less48bytes)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(exit):
+ pmovmskb %xmm1, %ebx
+ sub $0xffff, %ebx
+ jz L(first16bytes)
+ lea -16(%esi), %esi
+ lea -16(%edi), %edi
+ mov %ebx, %edx
+L(first16bytes):
+ add %eax, %esi
+L(less16bytes):
+ test %dl, %dl
+ jz L(next_24_bytes)
+
+ test $0x01, %dl
+ jnz L(Byte16)
+
+ test $0x02, %dl
+ jnz L(Byte17)
+
+ test $0x04, %dl
+ jnz L(Byte18)
+
+ test $0x08, %dl
+ jnz L(Byte19)
+
+ test $0x10, %dl
+ jnz L(Byte20)
+
+ test $0x20, %dl
+ jnz L(Byte21)
+
+ test $0x40, %dl
+ jnz L(Byte22)
+L(Byte23):
+ movzbl -9(%edi), %eax
+ movzbl -9(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ ALIGN (4)
+L(Byte16):
+ movzbl -16(%edi), %eax
+ movzbl -16(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ ALIGN (4)
+L(Byte17):
+ movzbl -15(%edi), %eax
+ movzbl -15(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ ALIGN (4)
+L(Byte18):
+ movzbl -14(%edi), %eax
+ movzbl -14(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ ALIGN (4)
+L(Byte19):
+ movzbl -13(%edi), %eax
+ movzbl -13(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ ALIGN (4)
+L(Byte20):
+ movzbl -12(%edi), %eax
+ movzbl -12(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ ALIGN (4)
+L(Byte21):
+ movzbl -11(%edi), %eax
+ movzbl -11(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ ALIGN (4)
+L(Byte22):
+ movzbl -10(%edi), %eax
+ movzbl -10(%esi), %edx
+ sub %edx, %eax
+ RETURN
+
+ ALIGN (4)
+L(next_24_bytes):
+ lea 8(%edi), %edi
+ lea 8(%esi), %esi
+ test $0x01, %dh
+ jnz L(Byte16)
+
+ test $0x02, %dh
+ jnz L(Byte17)
+
+ test $0x04, %dh
+ jnz L(Byte18)
+
+ test $0x08, %dh
+ jnz L(Byte19)
+
+ test $0x10, %dh
+ jnz L(Byte20)
+
+ test $0x20, %dh
+ jnz L(Byte21)
+
+ test $0x40, %dh
+ jnz L(Byte22)
+
+ ALIGN (4)
+L(Byte31):
+ movzbl -9(%edi), %eax
+ movzbl -9(%esi), %edx
+ sub %edx, %eax
+ RETURN_END
+
+ CFI_PUSH (%ebx)
+ ALIGN (4)
+L(more8bytes):
+ cmp $16, %ecx
+ jae L(more16bytes)
+ cmp $8, %ecx
+ je L(8bytes)
+ cmp $9, %ecx
+ je L(9bytes)
+ cmp $10, %ecx
+ je L(10bytes)
+ cmp $11, %ecx
+ je L(11bytes)
+ cmp $12, %ecx
+ je L(12bytes)
+ cmp $13, %ecx
+ je L(13bytes)
+ cmp $14, %ecx
+ je L(14bytes)
+ jmp L(15bytes)
+
+ ALIGN (4)
+L(more16bytes):
+ cmp $24, %ecx
+ jae L(more24bytes)
+ cmp $16, %ecx
+ je L(16bytes)
+ cmp $17, %ecx
+ je L(17bytes)
+ cmp $18, %ecx
+ je L(18bytes)
+ cmp $19, %ecx
+ je L(19bytes)
+ cmp $20, %ecx
+ je L(20bytes)
+ cmp $21, %ecx
+ je L(21bytes)
+ cmp $22, %ecx
+ je L(22bytes)
+ jmp L(23bytes)
+
+ ALIGN (4)
+L(more24bytes):
+ cmp $32, %ecx
+ jae L(more32bytes)
+ cmp $24, %ecx
+ je L(24bytes)
+ cmp $25, %ecx
+ je L(25bytes)
+ cmp $26, %ecx
+ je L(26bytes)
+ cmp $27, %ecx
+ je L(27bytes)
+ cmp $28, %ecx
+ je L(28bytes)
+ cmp $29, %ecx
+ je L(29bytes)
+ cmp $30, %ecx
+ je L(30bytes)
+ jmp L(31bytes)
+
+ ALIGN (4)
+L(more32bytes):
+ cmp $40, %ecx
+ jae L(more40bytes)
+ cmp $32, %ecx
+ je L(32bytes)
+ cmp $33, %ecx
+ je L(33bytes)
+ cmp $34, %ecx
+ je L(34bytes)
+ cmp $35, %ecx
+ je L(35bytes)
+ cmp $36, %ecx
+ je L(36bytes)
+ cmp $37, %ecx
+ je L(37bytes)
+ cmp $38, %ecx
+ je L(38bytes)
+ jmp L(39bytes)
+
+ ALIGN (4)
+L(more40bytes):
+ cmp $40, %ecx
+ je L(40bytes)
+ cmp $41, %ecx
+ je L(41bytes)
+ cmp $42, %ecx
+ je L(42bytes)
+ cmp $43, %ecx
+ je L(43bytes)
+ cmp $44, %ecx
+ je L(44bytes)
+ cmp $45, %ecx
+ je L(45bytes)
+ cmp $46, %ecx
+ je L(46bytes)
+ jmp L(47bytes)
+
+ ALIGN (4)
+L(less48bytes):
+ cmp $8, %ecx
+ jae L(more8bytes)
+ cmp $2, %ecx
+ je L(2bytes)
+ cmp $3, %ecx
+ je L(3bytes)
+ cmp $4, %ecx
+ je L(4bytes)
+ cmp $5, %ecx
+ je L(5bytes)
+ cmp $6, %ecx
+ je L(6bytes)
+ jmp L(7bytes)
+
+ ALIGN (4)
+L(44bytes):
+ mov -44(%eax), %ecx
+ mov -44(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(40bytes):
+ mov -40(%eax), %ecx
+ mov -40(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(36bytes):
+ mov -36(%eax), %ecx
+ mov -36(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(32bytes):
+ mov -32(%eax), %ecx
+ mov -32(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(28bytes):
+ mov -28(%eax), %ecx
+ mov -28(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(24bytes):
+ mov -24(%eax), %ecx
+ mov -24(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(20bytes):
+ mov -20(%eax), %ecx
+ mov -20(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(16bytes):
+ mov -16(%eax), %ecx
+ mov -16(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(12bytes):
+ mov -12(%eax), %ecx
+ mov -12(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(8bytes):
+ mov -8(%eax), %ecx
+ mov -8(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(4bytes):
+ mov -4(%eax), %ecx
+ mov -4(%edx), %ebx
+ cmp %ebx, %ecx
+ mov $0, %eax
+ jne L(find_diff)
+ POP (%ebx)
+ ret
+ CFI_PUSH (%ebx)
+
+ ALIGN (4)
+L(45bytes):
+ mov -45(%eax), %ecx
+ mov -45(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(41bytes):
+ mov -41(%eax), %ecx
+ mov -41(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(37bytes):
+ mov -37(%eax), %ecx
+ mov -37(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(33bytes):
+ mov -33(%eax), %ecx
+ mov -33(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(29bytes):
+ mov -29(%eax), %ecx
+ mov -29(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(25bytes):
+ mov -25(%eax), %ecx
+ mov -25(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(21bytes):
+ mov -21(%eax), %ecx
+ mov -21(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(17bytes):
+ mov -17(%eax), %ecx
+ mov -17(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(13bytes):
+ mov -13(%eax), %ecx
+ mov -13(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(9bytes):
+ mov -9(%eax), %ecx
+ mov -9(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(5bytes):
+ mov -5(%eax), %ecx
+ mov -5(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+ movzbl -1(%eax), %ecx
+ cmp -1(%edx), %cl
+ mov $0, %eax
+ jne L(end)
+ POP (%ebx)
+ ret
+ CFI_PUSH (%ebx)
+
+ ALIGN (4)
+L(46bytes):
+ mov -46(%eax), %ecx
+ mov -46(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(42bytes):
+ mov -42(%eax), %ecx
+ mov -42(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(38bytes):
+ mov -38(%eax), %ecx
+ mov -38(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(34bytes):
+ mov -34(%eax), %ecx
+ mov -34(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(30bytes):
+ mov -30(%eax), %ecx
+ mov -30(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(26bytes):
+ mov -26(%eax), %ecx
+ mov -26(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(22bytes):
+ mov -22(%eax), %ecx
+ mov -22(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(18bytes):
+ mov -18(%eax), %ecx
+ mov -18(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(14bytes):
+ mov -14(%eax), %ecx
+ mov -14(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(10bytes):
+ mov -10(%eax), %ecx
+ mov -10(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(6bytes):
+ mov -6(%eax), %ecx
+ mov -6(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(2bytes):
+ movzwl -2(%eax), %ecx
+ movzwl -2(%edx), %ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bh, %ch
+ mov $0, %eax
+ jne L(end)
+ POP (%ebx)
+ ret
+ CFI_PUSH (%ebx)
+
+ ALIGN (4)
+L(47bytes):
+ movl -47(%eax), %ecx
+ movl -47(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(43bytes):
+ movl -43(%eax), %ecx
+ movl -43(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(39bytes):
+ movl -39(%eax), %ecx
+ movl -39(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(35bytes):
+ movl -35(%eax), %ecx
+ movl -35(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(31bytes):
+ movl -31(%eax), %ecx
+ movl -31(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(27bytes):
+ movl -27(%eax), %ecx
+ movl -27(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(23bytes):
+ movl -23(%eax), %ecx
+ movl -23(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(19bytes):
+ movl -19(%eax), %ecx
+ movl -19(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(15bytes):
+ movl -15(%eax), %ecx
+ movl -15(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(11bytes):
+ movl -11(%eax), %ecx
+ movl -11(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(7bytes):
+ movl -7(%eax), %ecx
+ movl -7(%edx), %ebx
+ cmp %ebx, %ecx
+ jne L(find_diff)
+L(3bytes):
+ movzwl -3(%eax), %ecx
+ movzwl -3(%edx), %ebx
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+ movzbl -1(%eax), %eax
+ cmpb -1(%edx), %al
+ mov $0, %eax
+ jne L(end)
+ POP (%ebx)
+ ret
+ CFI_PUSH (%ebx)
+
+ ALIGN (4)
+L(find_diff):
+ cmpb %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+ jne L(end)
+ shr $16,%ecx
+ shr $16,%ebx
+ cmp %bl, %cl
+ jne L(end)
+ cmp %bx, %cx
+L(end):
+ POP (%ebx)
+ mov $1, %eax
+ ja L(bigger)
+ neg %eax
+L(bigger):
+ ret
+
+END (MEMCMP)
+
+#endif
diff --git a/libc/sysdeps/i386/i686/multiarch/memcmp.S b/libc/sysdeps/i386/i686/multiarch/memcmp.S
new file mode 100644
index 000000000..cf606a595
--- /dev/null
+++ b/libc/sysdeps/i386/i686/multiarch/memcmp.S
@@ -0,0 +1,88 @@
+/* Multiple versions of memcmp
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+/* Define multiple versions only for the definition in libc. */
+#ifndef NOT_IN_libc
+# ifdef SHARED
+ .text
+ENTRY(memcmp)
+ .type memcmp, @gnu_indirect_function
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+ call __i686.get_pc_thunk.bx
+ addl $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
+ jne 1f
+ call __init_cpu_features
+1: leal __memcmp_ia32@GOTOFF(%ebx), %eax
+ testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal __memcmp_ssse3@GOTOFF(%ebx), %eax
+ testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal __memcmp_sse4_2@GOTOFF(%ebx), %eax
+2: popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ ret
+END(memcmp)
+# else
+ .text
+ENTRY(memcmp)
+ .type memcmp, @gnu_indirect_function
+ cmpl $0, KIND_OFFSET+__cpu_features
+ jne 1f
+ call __init_cpu_features
+1: leal __memcmp_ia32, %eax
+ testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
+ jz 2f
+ leal __memcmp_ssse3, %eax
+ testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
+ jz 2f
+ leal __memcmp_sse4_2, %eax
+2: ret
+END(memcmp)
+# endif
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __memcmp_ia32, @function; \
+ .p2align 4; \
+ __memcmp_ia32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __memcmp_ia32, .-__memcmp_ia32
+
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_memcmp; __GI_memcmp = __memcmp_ia32
+# endif
+#endif
+
+#include "../memcmp.S"
diff --git a/libc/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S b/libc/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
index b26037d27..48a109ccd 100644
--- a/libc/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
+++ b/libc/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
@@ -127,10 +127,8 @@ ENTRY (MEMCPY)
cmp %eax, %edx
jb L(copy_forward)
je L(fwd_write_0bytes)
- cmp $32, %ecx
- jge L(memmove_bwd)
- jmp L(bk_write_less32bytes_2)
-L(memmove_bwd):
+ cmp $48, %ecx
+ jb L(bk_write_less48bytes)
add %ecx, %eax
cmp %eax, %edx
movl SRC(%esp), %eax
@@ -139,12 +137,12 @@ L(memmove_bwd):
L(copy_forward):
#endif
cmp $48, %ecx
- jge L(48bytesormore)
+ jae L(48bytesormore)
L(fwd_write_less32bytes):
#ifndef USE_AS_MEMMOVE
cmp %dl, %al
- jl L(bk_write)
+ jb L(bk_write)
#endif
add %ecx, %edx
add %ecx, %eax
@@ -162,6 +160,7 @@ L(48bytesormore):
movl %edx, %edi
and $-16, %edx
PUSH (%esi)
+ cfi_remember_state
add $16, %edx
movl %edi, %esi
sub %edx, %edi
@@ -181,7 +180,7 @@ L(48bytesormore):
#endif
mov %eax, %edi
- jge L(large_page)
+ jae L(large_page)
and $0xf, %edi
jz L(shl_0)
@@ -201,7 +200,7 @@ L(shl_0_loop):
movdqa %xmm0, (%edx, %edi)
movdqa %xmm1, 16(%edx, %edi)
lea 32(%edi), %edi
- jl L(shl_0_end)
+ jb L(shl_0_end)
movdqa (%eax, %edi), %xmm0
movdqa 16(%eax, %edi), %xmm1
@@ -209,7 +208,7 @@ L(shl_0_loop):
movdqa %xmm0, (%edx, %edi)
movdqa %xmm1, 16(%edx, %edi)
lea 32(%edi), %edi
- jl L(shl_0_end)
+ jb L(shl_0_end)
movdqa (%eax, %edi), %xmm0
movdqa 16(%eax, %edi), %xmm1
@@ -217,7 +216,7 @@ L(shl_0_loop):
movdqa %xmm0, (%edx, %edi)
movdqa %xmm1, 16(%edx, %edi)
lea 32(%edi), %edi
- jl L(shl_0_end)
+ jb L(shl_0_end)
movdqa (%eax, %edi), %xmm0
movdqa 16(%eax, %edi), %xmm1
@@ -234,6 +233,8 @@ L(shl_0_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
L(shl_0_gobble):
#ifdef DATA_CACHE_SIZE_HALF
@@ -251,8 +252,8 @@ L(shl_0_gobble):
shr $3, %esi
sub %esi, %edi
cmp %edi, %ecx
- jge L(shl_0_gobble_mem_start)
- lea -128(%ecx), %ecx
+ jae L(shl_0_gobble_mem_start)
+ sub $128, %ecx
ALIGN (4)
L(shl_0_gobble_cache_loop):
movdqa (%eax), %xmm0
@@ -275,11 +276,10 @@ L(shl_0_gobble_cache_loop):
movaps %xmm7, 0x70(%edx)
lea 0x80(%edx), %edx
- jge L(shl_0_gobble_cache_loop)
-L(shl_0_gobble_cache_loop_tail):
- cmp $-0x40, %ecx
- lea 0x80(%ecx), %ecx
- jl L(shl_0_cache_less_64bytes)
+ jae L(shl_0_gobble_cache_loop)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(shl_0_cache_less_64bytes)
movdqa (%eax), %xmm0
sub $0x40, %ecx
@@ -297,7 +297,7 @@ L(shl_0_gobble_cache_loop_tail):
add $0x40, %edx
L(shl_0_cache_less_64bytes):
cmp $0x20, %ecx
- jl L(shl_0_cache_less_32bytes)
+ jb L(shl_0_cache_less_32bytes)
movdqa (%eax), %xmm0
sub $0x20, %ecx
movdqa 0x10(%eax), %xmm1
@@ -307,7 +307,7 @@ L(shl_0_cache_less_64bytes):
add $0x20, %edx
L(shl_0_cache_less_32bytes):
cmp $0x10, %ecx
- jl L(shl_0_cache_less_16bytes)
+ jb L(shl_0_cache_less_16bytes)
sub $0x10, %ecx
movdqa (%eax), %xmm0
add $0x10, %eax
@@ -320,12 +320,13 @@ L(shl_0_cache_less_16bytes):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_0_gobble_mem_start):
cmp %al, %dl
je L(copy_page_by_rep)
- lea -128(%ecx), %ecx
+ sub $128, %ecx
L(shl_0_gobble_mem_loop):
prefetchnta 0x1c0(%eax)
prefetchnta 0x280(%eax)
@@ -352,10 +353,10 @@ L(shl_0_gobble_mem_loop):
movaps %xmm7, 0x70(%edx)
lea 0x80(%edx), %edx
- jge L(shl_0_gobble_mem_loop)
- cmp $-0x40, %ecx
- lea 0x80(%ecx), %ecx
- jl L(shl_0_mem_less_64bytes)
+ jae L(shl_0_gobble_mem_loop)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(shl_0_mem_less_64bytes)
movdqa (%eax), %xmm0
sub $0x40, %ecx
@@ -373,7 +374,7 @@ L(shl_0_gobble_mem_loop):
add $0x40, %edx
L(shl_0_mem_less_64bytes):
cmp $0x20, %ecx
- jl L(shl_0_mem_less_32bytes)
+ jb L(shl_0_mem_less_32bytes)
movdqa (%eax), %xmm0
sub $0x20, %ecx
movdqa 0x10(%eax), %xmm1
@@ -383,7 +384,7 @@ L(shl_0_mem_less_64bytes):
add $0x20, %edx
L(shl_0_mem_less_32bytes):
cmp $0x10, %ecx
- jl L(shl_0_mem_less_16bytes)
+ jb L(shl_0_mem_less_16bytes)
sub $0x10, %ecx
movdqa (%eax), %xmm0
add $0x10, %eax
@@ -396,14 +397,15 @@ L(shl_0_mem_less_16bytes):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_1):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -1(%eax), %eax
+ sub $1, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_1_loop):
@@ -418,7 +420,7 @@ L(shl_1_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_1_end)
+ jb L(shl_1_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -433,20 +435,22 @@ L(shl_1_loop):
jae L(shl_1_loop)
L(shl_1_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 1(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_2):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -2(%eax), %eax
+ sub $2, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_2_loop):
@@ -461,7 +465,7 @@ L(shl_2_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_2_end)
+ jb L(shl_2_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -476,20 +480,22 @@ L(shl_2_loop):
jae L(shl_2_loop)
L(shl_2_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 2(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_3):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -3(%eax), %eax
+ sub $3, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_3_loop):
@@ -504,7 +510,7 @@ L(shl_3_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_3_end)
+ jb L(shl_3_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -519,20 +525,22 @@ L(shl_3_loop):
jae L(shl_3_loop)
L(shl_3_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 3(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_4):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -4(%eax), %eax
+ sub $4, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_4_loop):
@@ -547,7 +555,7 @@ L(shl_4_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_4_end)
+ jb L(shl_4_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -562,20 +570,22 @@ L(shl_4_loop):
jae L(shl_4_loop)
L(shl_4_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 4(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_5):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -5(%eax), %eax
+ sub $5, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_5_loop):
@@ -590,7 +600,7 @@ L(shl_5_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_5_end)
+ jb L(shl_5_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -605,21 +615,22 @@ L(shl_5_loop):
jae L(shl_5_loop)
L(shl_5_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 5(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_6):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -6(%eax), %eax
+ sub $6, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_6_loop):
@@ -634,7 +645,7 @@ L(shl_6_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_6_end)
+ jb L(shl_6_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -649,20 +660,22 @@ L(shl_6_loop):
jae L(shl_6_loop)
L(shl_6_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 6(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_7):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -7(%eax), %eax
+ sub $7, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_7_loop):
@@ -677,7 +690,7 @@ L(shl_7_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_7_end)
+ jb L(shl_7_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -692,20 +705,22 @@ L(shl_7_loop):
jae L(shl_7_loop)
L(shl_7_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 7(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_8):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -8(%eax), %eax
+ sub $8, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_8_loop):
@@ -720,7 +735,7 @@ L(shl_8_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_8_end)
+ jb L(shl_8_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -735,20 +750,22 @@ L(shl_8_loop):
jae L(shl_8_loop)
L(shl_8_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 8(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_9):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -9(%eax), %eax
+ sub $9, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_9_loop):
@@ -763,7 +780,7 @@ L(shl_9_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_9_end)
+ jb L(shl_9_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -778,20 +795,22 @@ L(shl_9_loop):
jae L(shl_9_loop)
L(shl_9_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 9(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_10):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -10(%eax), %eax
+ sub $10, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_10_loop):
@@ -806,7 +825,7 @@ L(shl_10_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_10_end)
+ jb L(shl_10_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -821,20 +840,22 @@ L(shl_10_loop):
jae L(shl_10_loop)
L(shl_10_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 10(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_11):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -11(%eax), %eax
+ sub $11, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_11_loop):
@@ -849,7 +870,7 @@ L(shl_11_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_11_end)
+ jb L(shl_11_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -864,20 +885,22 @@ L(shl_11_loop):
jae L(shl_11_loop)
L(shl_11_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 11(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_12):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -12(%eax), %eax
+ sub $12, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_12_loop):
@@ -892,7 +915,7 @@ L(shl_12_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_12_end)
+ jb L(shl_12_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -907,20 +930,22 @@ L(shl_12_loop):
jae L(shl_12_loop)
L(shl_12_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 12(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_13):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -13(%eax), %eax
+ sub $13, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_13_loop):
@@ -935,7 +960,7 @@ L(shl_13_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_13_end)
+ jb L(shl_13_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -950,20 +975,22 @@ L(shl_13_loop):
jae L(shl_13_loop)
L(shl_13_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 13(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_14):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -14(%eax), %eax
+ sub $14, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_14_loop):
@@ -978,7 +1005,7 @@ L(shl_14_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_14_end)
+ jb L(shl_14_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -993,21 +1020,22 @@ L(shl_14_loop):
jae L(shl_14_loop)
L(shl_14_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 14(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_15):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -15(%eax), %eax
+ sub $15, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_15_loop):
@@ -1022,7 +1050,7 @@ L(shl_15_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_15_end)
+ jb L(shl_15_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -1037,7 +1065,7 @@ L(shl_15_loop):
jae L(shl_15_loop)
L(shl_15_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 15(%edi, %eax), %eax
@@ -1241,20 +1269,23 @@ L(fwd_write_3bytes):
movl DEST(%esp), %eax
# endif
#endif
- RETURN
+ RETURN_END
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(large_page):
movdqu (%eax), %xmm1
- lea 16(%eax), %eax
movdqu %xmm0, (%esi)
movntdq %xmm1, (%edx)
- lea 16(%edx), %edx
+ add $0x10, %eax
+ add $0x10, %edx
+ sub $0x10, %ecx
cmp %al, %dl
je L(copy_page_by_rep)
L(large_page_loop_init):
POP (%esi)
- lea -0x90(%ecx), %ecx
+ sub $0x80, %ecx
POP (%edi)
L(large_page_loop):
prefetchnta 0x1c0(%eax)
@@ -1280,9 +1311,9 @@ L(large_page_loop):
movntdq %xmm7, 0x70(%edx)
lea 0x80(%edx), %edx
jae L(large_page_loop)
- cmp $-0x40, %ecx
- lea 0x80(%ecx), %ecx
- jl L(large_page_less_64bytes)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(large_page_less_64bytes)
movdqu (%eax), %xmm0
movdqu 0x10(%eax), %xmm1
@@ -1298,7 +1329,7 @@ L(large_page_loop):
sub $0x40, %ecx
L(large_page_less_64bytes):
cmp $32, %ecx
- jl L(large_page_less_32bytes)
+ jb L(large_page_less_32bytes)
movdqu (%eax), %xmm0
movdqu 0x10(%eax), %xmm1
lea 0x20(%eax), %eax
@@ -1312,6 +1343,8 @@ L(large_page_less_32bytes):
sfence
BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(copy_page_by_rep):
mov %eax, %esi
@@ -1658,18 +1691,18 @@ L(table_48_bytes_bwd):
L(copy_backward):
PUSH (%esi)
movl %eax, %esi
- lea (%ecx,%edx,1),%edx
- lea (%ecx,%esi,1),%esi
+ add %ecx, %edx
+ add %ecx, %esi
testl $0x3, %edx
jnz L(bk_align)
L(bk_aligned_4):
cmp $64, %ecx
- jge L(bk_write_more64bytes)
+ jae L(bk_write_more64bytes)
L(bk_write_64bytesless):
cmp $32, %ecx
- jl L(bk_write_less32bytes)
+ jb L(bk_write_less32bytes)
L(bk_write_more32bytes):
/* Copy 32 bytes at a time. */
@@ -1698,13 +1731,14 @@ L(bk_write_less32bytes):
sub %ecx, %edx
sub %ecx, %eax
POP (%esi)
-L(bk_write_less32bytes_2):
+L(bk_write_less48bytes):
BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
+ CFI_PUSH (%esi)
ALIGN (4)
L(bk_align):
cmp $8, %ecx
- jle L(bk_write_less32bytes)
+ jbe L(bk_write_less32bytes)
testl $1, %edx
/* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
then (EDX & 2) must be != 0. */
@@ -1760,7 +1794,7 @@ L(bk_ssse3_align):
L(bk_ssse3_cpy_pre):
cmp $64, %ecx
- jl L(bk_write_more32bytes)
+ jb L(bk_write_more32bytes)
L(bk_ssse3_cpy):
sub $64, %esi
@@ -1775,7 +1809,7 @@ L(bk_ssse3_cpy):
movdqu (%esi), %xmm0
movdqa %xmm0, (%edx)
cmp $64, %ecx
- jge L(bk_ssse3_cpy)
+ jae L(bk_ssse3_cpy)
jmp L(bk_write_64bytesless)
#endif
diff --git a/libc/sysdeps/i386/i686/multiarch/memcpy-ssse3.S b/libc/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
index 749c82d37..ec9eeb95e 100644
--- a/libc/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
+++ b/libc/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
@@ -128,7 +128,7 @@ ENTRY (MEMCPY)
jb L(copy_forward)
je L(fwd_write_0bytes)
cmp $32, %ecx
- jge L(memmove_bwd)
+ jae L(memmove_bwd)
jmp L(bk_write_less32bytes_2)
L(memmove_bwd):
add %ecx, %eax
@@ -139,12 +139,12 @@ L(memmove_bwd):
L(copy_forward):
#endif
cmp $48, %ecx
- jge L(48bytesormore)
+ jae L(48bytesormore)
L(fwd_write_less32bytes):
#ifndef USE_AS_MEMMOVE
cmp %dl, %al
- jl L(bk_write)
+ jb L(bk_write)
#endif
add %ecx, %edx
add %ecx, %eax
@@ -162,6 +162,7 @@ L(48bytesormore):
movl %edx, %edi
and $-16, %edx
PUSH (%esi)
+ cfi_remember_state
add $16, %edx
movl %edi, %esi
sub %edx, %edi
@@ -181,12 +182,14 @@ L(48bytesormore):
#endif
mov %eax, %edi
- jge L(large_page)
+ jae L(large_page)
and $0xf, %edi
jz L(shl_0)
BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_0):
movdqu %xmm0, (%esi)
@@ -202,7 +205,7 @@ L(shl_0_loop):
movdqa %xmm0, (%edx, %edi)
movdqa %xmm1, 16(%edx, %edi)
lea 32(%edi), %edi
- jl L(shl_0_end)
+ jb L(shl_0_end)
movdqa (%eax, %edi), %xmm0
movdqa 16(%eax, %edi), %xmm1
@@ -210,7 +213,7 @@ L(shl_0_loop):
movdqa %xmm0, (%edx, %edi)
movdqa %xmm1, 16(%edx, %edi)
lea 32(%edi), %edi
- jl L(shl_0_end)
+ jb L(shl_0_end)
movdqa (%eax, %edi), %xmm0
movdqa 16(%eax, %edi), %xmm1
@@ -218,7 +221,7 @@ L(shl_0_loop):
movdqa %xmm0, (%edx, %edi)
movdqa %xmm1, 16(%edx, %edi)
lea 32(%edi), %edi
- jl L(shl_0_end)
+ jb L(shl_0_end)
movdqa (%eax, %edi), %xmm0
movdqa 16(%eax, %edi), %xmm1
@@ -234,6 +237,7 @@ L(shl_0_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+ CFI_PUSH (%edi)
L(shl_0_gobble):
#ifdef DATA_CACHE_SIZE_HALF
@@ -250,7 +254,7 @@ L(shl_0_gobble):
POP (%edi)
lea -128(%ecx), %ecx
- jge L(shl_0_gobble_mem_loop)
+ jae L(shl_0_gobble_mem_loop)
L(shl_0_gobble_cache_loop):
movdqa (%eax), %xmm0
movdqa 0x10(%eax), %xmm1
@@ -272,8 +276,7 @@ L(shl_0_gobble_cache_loop):
movdqa %xmm7, 0x70(%edx)
lea 0x80(%edx), %edx
- jge L(shl_0_gobble_cache_loop)
-L(shl_0_gobble_cache_loop_tail):
+ jae L(shl_0_gobble_cache_loop)
cmp $-0x40, %ecx
lea 0x80(%ecx), %ecx
jl L(shl_0_cache_less_64bytes)
@@ -294,7 +297,7 @@ L(shl_0_gobble_cache_loop_tail):
add $0x40, %edx
L(shl_0_cache_less_64bytes):
cmp $0x20, %ecx
- jl L(shl_0_cache_less_32bytes)
+ jb L(shl_0_cache_less_32bytes)
movdqa (%eax), %xmm0
sub $0x20, %ecx
movdqa 0x10(%eax), %xmm1
@@ -304,7 +307,7 @@ L(shl_0_cache_less_64bytes):
add $0x20, %edx
L(shl_0_cache_less_32bytes):
cmp $0x10, %ecx
- jl L(shl_0_cache_less_16bytes)
+ jb L(shl_0_cache_less_16bytes)
sub $0x10, %ecx
movdqa (%eax), %xmm0
add $0x10, %eax
@@ -342,7 +345,7 @@ L(shl_0_gobble_mem_loop):
movdqa %xmm7, 0x70(%edx)
lea 0x80(%edx), %edx
- jge L(shl_0_gobble_mem_loop)
+ jae L(shl_0_gobble_mem_loop)
cmp $-0x40, %ecx
lea 0x80(%ecx), %ecx
jl L(shl_0_mem_less_64bytes)
@@ -363,7 +366,7 @@ L(shl_0_gobble_mem_loop):
add $0x40, %edx
L(shl_0_mem_less_64bytes):
cmp $0x20, %ecx
- jl L(shl_0_mem_less_32bytes)
+ jb L(shl_0_mem_less_32bytes)
movdqa (%eax), %xmm0
sub $0x20, %ecx
movdqa 0x10(%eax), %xmm1
@@ -373,7 +376,7 @@ L(shl_0_mem_less_64bytes):
add $0x20, %edx
L(shl_0_mem_less_32bytes):
cmp $0x10, %ecx
- jl L(shl_0_mem_less_16bytes)
+ jb L(shl_0_mem_less_16bytes)
sub $0x10, %ecx
movdqa (%eax), %xmm0
add $0x10, %eax
@@ -384,7 +387,8 @@ L(shl_0_mem_less_16bytes):
add %ecx, %eax
BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_1):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -406,7 +410,7 @@ L(shl_1_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_1_end)
+ jb L(shl_1_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -428,6 +432,8 @@ L(shl_1_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_2):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -449,7 +455,7 @@ L(shl_2_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_2_end)
+ jb L(shl_2_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -471,6 +477,8 @@ L(shl_2_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_3):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -492,7 +500,7 @@ L(shl_3_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_3_end)
+ jb L(shl_3_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -514,6 +522,8 @@ L(shl_3_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_4):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -535,7 +545,7 @@ L(shl_4_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_4_end)
+ jb L(shl_4_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -557,6 +567,8 @@ L(shl_4_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_5):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -578,7 +590,7 @@ L(shl_5_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_5_end)
+ jb L(shl_5_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -600,7 +612,8 @@ L(shl_5_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_6):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -622,7 +635,7 @@ L(shl_6_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_6_end)
+ jb L(shl_6_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -644,6 +657,8 @@ L(shl_6_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_7):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -665,7 +680,7 @@ L(shl_7_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_7_end)
+ jb L(shl_7_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -687,6 +702,8 @@ L(shl_7_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_8):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -708,7 +725,7 @@ L(shl_8_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_8_end)
+ jb L(shl_8_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -730,6 +747,8 @@ L(shl_8_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_9):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -751,7 +770,7 @@ L(shl_9_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_9_end)
+ jb L(shl_9_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -773,6 +792,8 @@ L(shl_9_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_10):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -794,7 +815,7 @@ L(shl_10_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_10_end)
+ jb L(shl_10_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -816,6 +837,8 @@ L(shl_10_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_11):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -837,7 +860,7 @@ L(shl_11_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_11_end)
+ jb L(shl_11_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -859,6 +882,8 @@ L(shl_11_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_12):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -880,7 +905,7 @@ L(shl_12_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_12_end)
+ jb L(shl_12_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -902,6 +927,8 @@ L(shl_12_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_13):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -923,7 +950,7 @@ L(shl_13_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_13_end)
+ jb L(shl_13_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -945,6 +972,8 @@ L(shl_13_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_14):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -966,7 +995,7 @@ L(shl_14_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_14_end)
+ jb L(shl_14_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -988,7 +1017,8 @@ L(shl_14_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_15):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -1010,7 +1040,7 @@ L(shl_15_loop):
movdqa %xmm2, -32(%edx, %edi)
movdqa %xmm3, -16(%edx, %edi)
- jl L(shl_15_end)
+ jb L(shl_15_end)
movdqa 16(%eax, %edi), %xmm2
sub $32, %ecx
@@ -1229,8 +1259,10 @@ L(fwd_write_3bytes):
movl DEST(%esp), %eax
# endif
#endif
- RETURN
+ RETURN_END
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(large_page):
movdqu (%eax), %xmm1
@@ -1281,7 +1313,7 @@ L(large_page_loop):
sub $0x40, %ecx
L(large_page_less_64bytes):
cmp $32, %ecx
- jl L(large_page_less_32bytes)
+ jb L(large_page_less_32bytes)
movdqu (%eax), %xmm0
movdqu 0x10(%eax), %xmm1
lea 0x20(%eax), %eax
@@ -1617,11 +1649,11 @@ L(copy_backward):
L(bk_aligned_4):
cmp $64, %ecx
- jge L(bk_write_more64bytes)
+ jae L(bk_write_more64bytes)
L(bk_write_64bytesless):
cmp $32, %ecx
- jl L(bk_write_less32bytes)
+ jb L(bk_write_less32bytes)
L(bk_write_more32bytes):
/* Copy 32 bytes at a time. */
@@ -1653,10 +1685,11 @@ L(bk_write_less32bytes):
L(bk_write_less32bytes_2):
BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
+ CFI_PUSH (%esi)
ALIGN (4)
L(bk_align):
cmp $8, %ecx
- jle L(bk_write_less32bytes)
+ jbe L(bk_write_less32bytes)
testl $1, %edx
/* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
then (EDX & 2) must be != 0. */
@@ -1712,7 +1745,7 @@ L(bk_ssse3_align):
L(bk_ssse3_cpy_pre):
cmp $64, %ecx
- jl L(bk_write_more32bytes)
+ jb L(bk_write_more32bytes)
L(bk_ssse3_cpy):
sub $64, %esi
@@ -1727,7 +1760,7 @@ L(bk_ssse3_cpy):
movdqu (%esi), %xmm0
movdqa %xmm0, (%edx)
cmp $64, %ecx
- jge L(bk_ssse3_cpy)
+ jae L(bk_ssse3_cpy)
jmp L(bk_write_64bytesless)
#endif
diff --git a/libc/sysdeps/i386/i686/multiarch/memset-sse2-rep.S b/libc/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
index 84afffeb6..f9a0b13d0 100644
--- a/libc/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
+++ b/libc/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
@@ -243,7 +243,6 @@ L(32bytesormore):
pxor %xmm0, %xmm0
#else
movd %eax, %xmm0
- punpcklbw %xmm0, %xmm0
pshufd $0, %xmm0, %xmm0
#endif
testl $0xf, %edx
@@ -261,7 +260,7 @@ L(not_aligned_16):
ALIGN (4)
L(aligned_16):
cmp $128, %ecx
- jge L(128bytesormore)
+ jae L(128bytesormore)
L(aligned_16_less128bytes):
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
@@ -293,7 +292,7 @@ L(128bytesormore):
* fast string will prefetch and combine data efficiently.
*/
cmp %edi, %ecx
- jae L(128bytesormore_nt)
+ jae L(128bytesormore_endof_L1)
subl $128, %ecx
L(128bytesormore_normal):
sub $128, %ecx
@@ -306,7 +305,7 @@ L(128bytesormore_normal):
movdqa %xmm0, 0x60(%edx)
movdqa %xmm0, 0x70(%edx)
lea 128(%edx), %edx
- jl L(128bytesless_normal)
+ jb L(128bytesless_normal)
sub $128, %ecx
@@ -319,15 +318,16 @@ L(128bytesormore_normal):
movdqa %xmm0, 0x60(%edx)
movdqa %xmm0, 0x70(%edx)
lea 128(%edx), %edx
- jge L(128bytesormore_normal)
+ jae L(128bytesormore_normal)
L(128bytesless_normal):
POP (%edi)
- lea 128(%ecx), %ecx
+ add $128, %ecx
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+ CFI_PUSH (%edi)
ALIGN (4)
-L(128bytesormore_nt):
+L(128bytesormore_endof_L1):
mov %edx, %edi
mov %ecx, %edx
shr $2, %ecx
diff --git a/libc/sysdeps/i386/i686/multiarch/memset-sse2.S b/libc/sysdeps/i386/i686/multiarch/memset-sse2.S
index b2b979193..92ad601bf 100644
--- a/libc/sysdeps/i386/i686/multiarch/memset-sse2.S
+++ b/libc/sysdeps/i386/i686/multiarch/memset-sse2.S
@@ -243,7 +243,6 @@ L(32bytesormore):
pxor %xmm0, %xmm0
#else
movd %eax, %xmm0
- punpcklbw %xmm0, %xmm0
pshufd $0, %xmm0, %xmm0
#endif
testl $0xf, %edx
@@ -261,7 +260,7 @@ L(not_aligned_16):
ALIGN (4)
L(aligned_16):
cmp $128, %ecx
- jge L(128bytesormore)
+ jae L(128bytesormore)
L(aligned_16_less128bytes):
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
@@ -287,14 +286,17 @@ L(128bytesormore):
#ifdef DATA_CACHE_SIZE
POP (%ebx)
+# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
cmp $DATA_CACHE_SIZE, %ecx
#else
# ifdef SHARED
+# define RESTORE_EBX_STATE
call __i686.get_pc_thunk.bx
add $_GLOBAL_OFFSET_TABLE_, %ebx
cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx
# else
POP (%ebx)
+# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
cmp __x86_data_cache_size, %ecx
# endif
#endif
@@ -312,7 +314,7 @@ L(128bytesormore_normal):
movdqa %xmm0, 0x60(%edx)
movdqa %xmm0, 0x70(%edx)
lea 128(%edx), %edx
- jl L(128bytesless_normal)
+ jb L(128bytesless_normal)
sub $128, %ecx
@@ -325,10 +327,10 @@ L(128bytesormore_normal):
movdqa %xmm0, 0x60(%edx)
movdqa %xmm0, 0x70(%edx)
lea 128(%edx), %edx
- jge L(128bytesormore_normal)
+ jae L(128bytesormore_normal)
L(128bytesless_normal):
- lea 128(%ecx), %ecx
+ add $128, %ecx
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
ALIGN (4)
@@ -346,11 +348,12 @@ L(128bytes_L2_normal):
movaps %xmm0, 0x70(%edx)
add $128, %edx
cmp $128, %ecx
- jge L(128bytes_L2_normal)
+ jae L(128bytes_L2_normal)
L(128bytesless_L2_normal):
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+ RESTORE_EBX_STATE
L(128bytesormore_nt_start):
sub %ebx, %ecx
ALIGN (4)
@@ -368,7 +371,7 @@ L(128bytesormore_shared_cache_loop):
movdqa %xmm0, 0x70(%edx)
add $0x80, %edx
cmp $0x80, %ebx
- jge L(128bytesormore_shared_cache_loop)
+ jae L(128bytesormore_shared_cache_loop)
cmp $0x80, %ecx
jb L(shared_cache_loop_end)
ALIGN (4)
@@ -384,7 +387,7 @@ L(128bytesormore_nt):
movntdq %xmm0, 0x70(%edx)
add $0x80, %edx
cmp $0x80, %ecx
- jge L(128bytesormore_nt)
+ jae L(128bytesormore_nt)
sfence
L(shared_cache_loop_end):
#if defined DATA_CACHE_SIZE || !defined SHARED
diff --git a/libc/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/libc/sysdeps/i386/i686/multiarch/strcmp-sse4.S
new file mode 100644
index 000000000..81d6ec66f
--- /dev/null
+++ b/libc/sysdeps/i386/i686/multiarch/strcmp-sse4.S
@@ -0,0 +1,380 @@
+/* strcmp with SSE4.2
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef NOT_IN_libc
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#ifndef USE_AS_STRNCMP
+# ifndef STRCMP
+# define STRCMP __strcmp_sse4_2
+# endif
+# define STR1 4
+# define STR2 STR1+4
+# define RETURN ret; .p2align 4
+#else
+# ifndef STRCMP
+# define STRCMP __strncmp_sse4_2
+# endif
+# define STR1 8
+# define STR2 STR1+4
+# define CNT STR2+4
+# define RETURN POP (%ebp); ret; .p2align 4; CFI_PUSH (%ebp)
+#endif
+
+ .section .text.sse4.2,"ax",@progbits
+ENTRY (STRCMP)
+#ifdef USE_AS_STRNCMP
+ PUSH (%ebp)
+#endif
+ mov STR1(%esp), %edx
+ mov STR2(%esp), %eax
+#ifdef USE_AS_STRNCMP
+ movl CNT(%esp), %ebp
+ test %ebp, %ebp
+ je L(eq)
+#endif
+ mov %dx, %cx
+ and $0xfff, %cx
+ cmp $0xff0, %cx
+ ja L(first4bytes)
+ movdqu (%edx), %xmm2
+ mov %eax, %ecx
+ and $0xfff, %ecx
+ cmp $0xff0, %ecx
+ ja L(first4bytes)
+ movd %xmm2, %ecx
+ cmp (%eax), %ecx
+ jne L(less4bytes)
+ movdqu (%eax), %xmm1
+ pxor %xmm2, %xmm1
+ pxor %xmm0, %xmm0
+ ptest %xmm1, %xmm0
+ jnc L(less16bytes)
+ pcmpeqb %xmm0, %xmm2
+ ptest %xmm2, %xmm0
+ jnc L(less16bytes)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %ebp
+ jbe L(eq)
+#endif
+ add $16, %edx
+ add $16, %eax
+L(first4bytes):
+ movzbl (%eax), %ecx
+ cmpb %cl, (%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $1, %ebp
+ je L(eq)
+#endif
+
+ movzbl 1(%eax), %ecx
+ cmpb %cl, 1(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $2, %ebp
+ je L(eq)
+#endif
+ movzbl 2(%eax), %ecx
+ cmpb %cl, 2(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $3, %ebp
+ je L(eq)
+#endif
+ movzbl 3(%eax), %ecx
+ cmpb %cl, 3(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $4, %ebp
+ je L(eq)
+#endif
+ movzbl 4(%eax), %ecx
+ cmpb %cl, 4(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $5, %ebp
+ je L(eq)
+#endif
+ movzbl 5(%eax), %ecx
+ cmpb %cl, 5(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $6, %ebp
+ je L(eq)
+#endif
+ movzbl 6(%eax), %ecx
+ cmpb %cl, 6(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $7, %ebp
+ je L(eq)
+#endif
+ movzbl 7(%eax), %ecx
+ cmpb %cl, 7(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ sub $8, %ebp
+ je L(eq)
+#endif
+ add $8, %eax
+ add $8, %edx
+
+ PUSH (%ebx)
+ PUSH (%edi)
+ PUSH (%esi)
+#ifdef USE_AS_STRNCMP
+ cfi_remember_state
+#endif
+ mov %edx, %edi
+ mov %eax, %esi
+ xorl %eax, %eax
+L(check_offset):
+ movl %edi, %ebx
+ movl %esi, %ecx
+ andl $0xfff, %ebx
+ andl $0xfff, %ecx
+ cmpl %ebx, %ecx
+ cmovl %ebx, %ecx
+ lea -0xff0(%ecx), %edx
+ sub %edx, %edi
+ sub %edx, %esi
+ testl %edx, %edx
+ jg L(crosspage)
+L(loop):
+ movdqu (%esi,%edx), %xmm2
+ movdqu (%edi,%edx), %xmm1
+ pcmpistri $0x1a, %xmm2, %xmm1
+ jbe L(end)
+
+#ifdef USE_AS_STRNCMP
+ sub $16, %ebp
+ jbe L(more16byteseq)
+#endif
+
+ add $16, %edx
+ jle L(loop)
+L(crosspage):
+ movzbl (%edi,%edx), %eax
+ movzbl (%esi,%edx), %ebx
+ subl %ebx, %eax
+ jne L(ret)
+ testl %ebx, %ebx
+ je L(ret)
+#ifdef USE_AS_STRNCMP
+ sub $1, %ebp
+ jbe L(more16byteseq)
+#endif
+ inc %edx
+ cmp $15, %edx
+ jle L(crosspage)
+ add $16, %edi
+ add $16, %esi
+ jmp L(check_offset)
+
+ .p2align 4
+L(end):
+ jnc L(ret)
+#ifdef USE_AS_STRNCMP
+ sub %ecx, %ebp
+ jbe L(more16byteseq)
+#endif
+ lea (%ecx,%edx), %ebx
+ movzbl (%edi,%ebx), %eax
+ movzbl (%esi,%ebx), %ecx
+ subl %ecx, %eax
+L(ret):
+ POP (%esi)
+ POP (%edi)
+ POP (%ebx)
+#ifdef USE_AS_STRNCMP
+ POP (%ebp)
+#endif
+ ret
+
+ .p2align 4
+#ifdef USE_AS_STRNCMP
+ cfi_restore_state
+L(more16byteseq):
+ POP (%esi)
+ POP (%edi)
+ POP (%ebx)
+#endif
+L(eq):
+ xorl %eax, %eax
+ RETURN
+
+L(neq):
+ mov $1, %eax
+ ja L(neq_bigger)
+ neg %eax
+L(neq_bigger):
+ RETURN
+
+L(less16bytes):
+ add $0xfefefeff, %ecx
+ jnc L(less4bytes)
+ xor (%edx), %ecx
+ or $0xfefefeff, %ecx
+ add $1, %ecx
+ jnz L(less4bytes)
+
+#ifdef USE_AS_STRNCMP
+ cmp $4, %ebp
+ jbe L(eq)
+#endif
+ mov 4(%edx), %ecx
+ cmp 4(%eax), %ecx
+ jne L(more4bytes)
+ add $0xfefefeff, %ecx
+ jnc L(more4bytes)
+ xor 4(%edx), %ecx
+ or $0xfefefeff, %ecx
+ add $1, %ecx
+ jnz L(more4bytes)
+
+#ifdef USE_AS_STRNCMP
+ sub $8, %ebp
+ jbe L(eq)
+#endif
+
+ add $8, %edx
+ add $8, %eax
+L(less4bytes):
+
+ movzbl (%eax), %ecx
+ cmpb %cl, (%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $1, %ebp
+ je L(eq)
+#endif
+ movzbl 1(%eax), %ecx
+ cmpb %cl, 1(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $2, %ebp
+ je L(eq)
+#endif
+
+ movzbl 2(%eax), %ecx
+ cmpb %cl, 2(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $3, %ebp
+ je L(eq)
+#endif
+ movzbl 3(%eax), %ecx
+ cmpb %cl, 3(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+L(more4bytes):
+#ifdef USE_AS_STRNCMP
+ cmp $4, %ebp
+ je L(eq)
+#endif
+ movzbl 4(%eax), %ecx
+ cmpb %cl, 4(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+
+#ifdef USE_AS_STRNCMP
+ cmp $5, %ebp
+ je L(eq)
+#endif
+ movzbl 5(%eax), %ecx
+ cmpb %cl, 5(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $6, %ebp
+ je L(eq)
+#endif
+ movzbl 6(%eax), %ecx
+ cmpb %cl, 6(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+#ifdef USE_AS_STRNCMP
+ cmp $7, %ebp
+ je L(eq)
+#endif
+ movzbl 7(%eax), %ecx
+ cmpb %cl, 7(%edx)
+ jne L(neq)
+ jmp L(eq)
+
+END (STRCMP)
+
+#endif
diff --git a/libc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S b/libc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S
new file mode 100644
index 000000000..40994c05b
--- /dev/null
+++ b/libc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S
@@ -0,0 +1,2220 @@
+/* strcmp with SSSE3
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef NOT_IN_libc
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#ifndef USE_AS_STRNCMP
+# ifndef STRCMP
+# define STRCMP __strcmp_ssse3
+# endif
+# define STR1 4
+# define STR2 STR1+4
+# define RETURN ret; .p2align 4
+# define UPDATE_STRNCMP_COUNTER
+#else
+# ifndef STRCMP
+# define STRCMP __strncmp_ssse3
+# endif
+# define STR1 8
+# define STR2 STR1+4
+# define CNT STR2+4
+# define RETURN POP (%ebp); ret; .p2align 4; CFI_PUSH (%ebp)
+# define UPDATE_STRNCMP_COUNTER \
+ /* calculate left number to compare */ \
+ mov $16, %esi; \
+ sub %ecx, %esi; \
+ cmp %esi, %ebp; \
+ jbe L(more8byteseq); \
+ sub %esi, %ebp
+#endif
+
+ .section .text.ssse3,"ax",@progbits
+ENTRY (STRCMP)
+#ifdef USE_AS_STRNCMP
+ PUSH (%ebp)
+#endif
+ movl STR1(%esp), %edx
+ movl STR2(%esp), %eax
+#ifdef USE_AS_STRNCMP
+ movl CNT(%esp), %ebp
+ cmp $16, %ebp
+ jb L(less16bytes_sncmp)
+ jmp L(more16bytes)
+#endif
+
+ movzbl (%eax), %ecx
+ cmpb %cl, (%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 1(%eax), %ecx
+ cmpb %cl, 1(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 2(%eax), %ecx
+ cmpb %cl, 2(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 3(%eax), %ecx
+ cmpb %cl, 3(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 4(%eax), %ecx
+ cmpb %cl, 4(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 5(%eax), %ecx
+ cmpb %cl, 5(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 6(%eax), %ecx
+ cmpb %cl, 6(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ movzbl 7(%eax), %ecx
+ cmpb %cl, 7(%edx)
+ jne L(neq)
+ cmpl $0, %ecx
+ je L(eq)
+
+ add $8, %edx
+ add $8, %eax
+#ifdef USE_AS_STRNCMP
+ cmp $8, %ebp
+ lea -8(%ebp), %ebp
+ je L(eq)
+L(more16bytes):
+#endif
+ movl %edx, %ecx
+ and $0xfff, %ecx
+ cmp $0xff0, %ecx
+ ja L(crosspage)
+ mov %eax, %ecx
+ and $0xfff, %ecx
+ cmp $0xff0, %ecx
+ ja L(crosspage)
+ pxor %xmm0, %xmm0
+ movlpd (%eax), %xmm1
+ movlpd (%edx), %xmm2
+ movhpd 8(%eax), %xmm1
+ movhpd 8(%edx), %xmm2
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %ecx
+ sub $0xffff, %ecx
+ jnz L(less16bytes)
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(eq)
+#endif
+ add $16, %eax
+ add $16, %edx
+
+L(crosspage):
+
+ PUSH (%ebx)
+ PUSH (%edi)
+ PUSH (%esi)
+#ifdef USE_AS_STRNCMP
+ cfi_remember_state
+#endif
+
+ movl %edx, %edi
+ movl %eax, %ecx
+ and $0xf, %ecx
+ and $0xf, %edi
+ xor %ecx, %eax
+ xor %edi, %edx
+ xor %ebx, %ebx
+ cmp %edi, %ecx
+ je L(ashr_0)
+ ja L(bigger)
+ or $0x20, %ebx
+ xchg %edx, %eax
+ xchg %ecx, %edi
+L(bigger):
+ lea 15(%edi), %edi
+ sub %ecx, %edi
+ cmp $8, %edi
+ jle L(ashr_less_8)
+ cmp $14, %edi
+ je L(ashr_15)
+ cmp $13, %edi
+ je L(ashr_14)
+ cmp $12, %edi
+ je L(ashr_13)
+ cmp $11, %edi
+ je L(ashr_12)
+ cmp $10, %edi
+ je L(ashr_11)
+ cmp $9, %edi
+ je L(ashr_10)
+L(ashr_less_8):
+ je L(ashr_9)
+ cmp $7, %edi
+ je L(ashr_8)
+ cmp $6, %edi
+ je L(ashr_7)
+ cmp $5, %edi
+ je L(ashr_6)
+ cmp $4, %edi
+ je L(ashr_5)
+ cmp $3, %edi
+ je L(ashr_4)
+ cmp $2, %edi
+ je L(ashr_3)
+ cmp $1, %edi
+ je L(ashr_2)
+ cmp $0, %edi
+ je L(ashr_1)
+
+/*
+ * The following cases will be handled by ashr_0
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(0~15) n(0~15) 15(15+ n-n) ashr_0
+ */
+ .p2align 4
+L(ashr_0):
+ mov $0xffff, %esi
+ movdqa (%eax), %xmm1
+ pxor %xmm0, %xmm0
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb (%edx), %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ mov %ecx, %edi
+ jne L(less32bytes)
+ UPDATE_STRNCMP_COUNTER
+ mov $0x10, %ebx
+ mov $0x10, %ecx
+ pxor %xmm0, %xmm0
+ .p2align 4
+L(loop_ashr_0):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ jmp L(loop_ashr_0)
+
+/*
+ * The following cases will be handled by ashr_1
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(15) n -15 0(15 +(n-15) - n) ashr_1
+ */
+ .p2align 4
+L(ashr_1):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $15, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -15(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $1, %ebx
+ lea 1(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_1):
+ add $16, %edi
+ jg L(nibble_ashr_1)
+
+L(gobble_ashr_1):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $1, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_1)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $1, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_1)
+
+ .p2align 4
+L(nibble_ashr_1):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xfffe, %esi
+ jnz L(ashr_1_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $15, %ebp
+ jbe L(ashr_1_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_1)
+
+ .p2align 4
+L(ashr_1_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $1, %xmm0
+ psrldq $1, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_2
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
+ */
+ .p2align 4
+L(ashr_2):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $14, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -14(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $2, %ebx
+ lea 2(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_2):
+ add $16, %edi
+ jg L(nibble_ashr_2)
+
+L(gobble_ashr_2):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $2, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_2)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $2, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_2)
+
+ .p2align 4
+L(nibble_ashr_2):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xfffc, %esi
+ jnz L(ashr_2_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $14, %ebp
+ jbe L(ashr_2_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_2)
+
+ .p2align 4
+L(ashr_2_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $2, %xmm0
+ psrldq $2, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_3
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
+ */
+ .p2align 4
+L(ashr_3):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $13, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -13(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $3, %ebx
+ lea 3(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_3):
+ add $16, %edi
+ jg L(nibble_ashr_3)
+
+L(gobble_ashr_3):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $3, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_3)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $3, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_3)
+
+ .p2align 4
+L(nibble_ashr_3):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xfff8, %esi
+ jnz L(ashr_3_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $13, %ebp
+ jbe L(ashr_3_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_3)
+
+ .p2align 4
+L(ashr_3_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $3, %xmm0
+ psrldq $3, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_4
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
+ */
+ .p2align 4
+L(ashr_4):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $12, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -12(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $4, %ebx
+ lea 4(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_4):
+ add $16, %edi
+ jg L(nibble_ashr_4)
+
+L(gobble_ashr_4):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $4, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_4)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $4, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_4)
+
+ .p2align 4
+L(nibble_ashr_4):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xfff0, %esi
+ jnz L(ashr_4_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $12, %ebp
+ jbe L(ashr_4_exittail)
+#endif
+
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_4)
+
+ .p2align 4
+L(ashr_4_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $4, %xmm0
+ psrldq $4, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_5
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(11~15) n -11 4(15 +(n-11) - n) ashr_5
+ */
+ .p2align 4
+L(ashr_5):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $11, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -11(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $5, %ebx
+ lea 5(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_5):
+ add $16, %edi
+ jg L(nibble_ashr_5)
+
+L(gobble_ashr_5):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $5, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_5)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $5, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_5)
+
+ .p2align 4
+L(nibble_ashr_5):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xffe0, %esi
+ jnz L(ashr_5_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $11, %ebp
+ jbe L(ashr_5_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_5)
+
+ .p2align 4
+L(ashr_5_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $5, %xmm0
+ psrldq $5, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_6
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(10~15) n -10 5(15 +(n-10) - n) ashr_6
+ */
+
+ .p2align 4
+L(ashr_6):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $10, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -10(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $6, %ebx
+ lea 6(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_6):
+ add $16, %edi
+ jg L(nibble_ashr_6)
+
+L(gobble_ashr_6):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $6, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_6)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $6, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_6)
+
+ .p2align 4
+L(nibble_ashr_6):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xffc0, %esi
+ jnz L(ashr_6_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $10, %ebp
+ jbe L(ashr_6_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_6)
+
+ .p2align 4
+L(ashr_6_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $6, %xmm0
+ psrldq $6, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_7
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7
+ */
+
+ .p2align 4
+L(ashr_7):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $9, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -9(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $7, %ebx
+ lea 8(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_7):
+ add $16, %edi
+ jg L(nibble_ashr_7)
+
+L(gobble_ashr_7):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $7, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_7)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $7, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_7)
+
+ .p2align 4
+L(nibble_ashr_7):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xff80, %esi
+ jnz L(ashr_7_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $9, %ebp
+ jbe L(ashr_7_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_7)
+
+ .p2align 4
+L(ashr_7_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $7, %xmm0
+ psrldq $7, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_8
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8
+ */
+ .p2align 4
+L(ashr_8):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $8, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -8(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $8, %ebx
+ lea 8(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_8):
+ add $16, %edi
+ jg L(nibble_ashr_8)
+
+L(gobble_ashr_8):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $8, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_8)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $8, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_8)
+
+ .p2align 4
+L(nibble_ashr_8):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xff00, %esi
+ jnz L(ashr_8_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $8, %ebp
+ jbe L(ashr_8_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_8)
+
+ .p2align 4
+L(ashr_8_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $8, %xmm0
+ psrldq $8, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_9
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9
+ */
+ .p2align 4
+L(ashr_9):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $7, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -7(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $9, %ebx
+ lea 9(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_9):
+ add $16, %edi
+ jg L(nibble_ashr_9)
+
+L(gobble_ashr_9):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $9, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_9)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $9, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_9)
+
+ .p2align 4
+L(nibble_ashr_9):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xfe00, %esi
+ jnz L(ashr_9_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $7, %ebp
+ jbe L(ashr_9_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_9)
+
+ .p2align 4
+L(ashr_9_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $9, %xmm0
+ psrldq $9, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_10
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10
+ */
+ .p2align 4
+L(ashr_10):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $6, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -6(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $10, %ebx
+ lea 10(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_10):
+ add $16, %edi
+ jg L(nibble_ashr_10)
+
+L(gobble_ashr_10):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $10, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_10)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $10, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_10)
+
+ .p2align 4
+L(nibble_ashr_10):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xfc00, %esi
+ jnz L(ashr_10_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $6, %ebp
+ jbe L(ashr_10_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_10)
+
+ .p2align 4
+L(ashr_10_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $10, %xmm0
+ psrldq $10, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_11
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11
+ */
+ .p2align 4
+L(ashr_11):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $5, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -5(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $11, %ebx
+ lea 11(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_11):
+ add $16, %edi
+ jg L(nibble_ashr_11)
+
+L(gobble_ashr_11):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $11, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_11)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $11, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_11)
+
+ .p2align 4
+L(nibble_ashr_11):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xf800, %esi
+ jnz L(ashr_11_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $5, %ebp
+ jbe L(ashr_11_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_11)
+
+ .p2align 4
+L(ashr_11_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $11, %xmm0
+ psrldq $11, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_12
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12
+ */
+ .p2align 4
+L(ashr_12):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $4, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -4(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $12, %ebx
+ lea 12(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_12):
+ add $16, %edi
+ jg L(nibble_ashr_12)
+
+L(gobble_ashr_12):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $12, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_12)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $12, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_12)
+
+ .p2align 4
+L(nibble_ashr_12):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xf000, %esi
+ jnz L(ashr_12_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $4, %ebp
+ jbe L(ashr_12_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_12)
+
+ .p2align 4
+L(ashr_12_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $12, %xmm0
+ psrldq $12, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_13
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13
+ */
+ .p2align 4
+L(ashr_13):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $3, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -3(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $13, %ebx
+ lea 13(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_13):
+ add $16, %edi
+ jg L(nibble_ashr_13)
+
+L(gobble_ashr_13):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $13, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_13)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $13, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_13)
+
+ .p2align 4
+L(nibble_ashr_13):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xe000, %esi
+ jnz L(ashr_13_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $3, %ebp
+ jbe L(ashr_13_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_13)
+
+ .p2align 4
+L(ashr_13_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $13, %xmm0
+ psrldq $13, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_14
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14
+ */
+ .p2align 4
+L(ashr_14):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $2, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -2(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $14, %ebx
+ lea 14(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_14):
+ add $16, %edi
+ jg L(nibble_ashr_14)
+
+L(gobble_ashr_14):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $14, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_14)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $14, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_14)
+
+ .p2align 4
+L(nibble_ashr_14):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0xc000, %esi
+ jnz L(ashr_14_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $2, %ebp
+ jbe L(ashr_14_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_14)
+
+ .p2align 4
+L(ashr_14_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $14, %xmm0
+ psrldq $14, %xmm3
+ jmp L(aftertail)
+
+/*
+ * The following cases will be handled by ashr_14
+ * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
+ * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15
+ */
+
+ .p2align 4
+L(ashr_15):
+ mov $0xffff, %esi
+ pxor %xmm0, %xmm0
+ movdqa (%edx), %xmm2
+ movdqa (%eax), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pslldq $1, %xmm2
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
+ pmovmskb %xmm2, %edi
+ shr %cl, %esi
+ shr %cl, %edi
+ sub %edi, %esi
+ lea -1(%ecx), %edi
+ jnz L(less32bytes)
+
+ UPDATE_STRNCMP_COUNTER
+
+ movdqa (%edx), %xmm3
+ pxor %xmm0, %xmm0
+ mov $16, %ecx
+ or $15, %ebx
+ lea 15(%edx), %edi
+ and $0xfff, %edi
+ sub $0x1000, %edi
+
+ .p2align 4
+L(loop_ashr_15):
+ add $16, %edi
+ jg L(nibble_ashr_15)
+
+L(gobble_ashr_15):
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $15, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+
+ add $16, %edi
+ jg L(nibble_ashr_15)
+
+ movdqa (%eax, %ecx), %xmm1
+ movdqa (%edx, %ecx), %xmm2
+ movdqa %xmm2, %xmm4
+
+ palignr $15, %xmm3, %xmm2
+
+ pcmpeqb %xmm1, %xmm0
+ pcmpeqb %xmm2, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ sub $0xffff, %esi
+ jnz L(exit)
+
+#ifdef USE_AS_STRNCMP
+ cmp $16, %ebp
+ lea -16(%ebp), %ebp
+ jbe L(more8byteseq)
+#endif
+ add $16, %ecx
+ movdqa %xmm4, %xmm3
+ jmp L(loop_ashr_15)
+
+ .p2align 4
+L(nibble_ashr_15):
+ pcmpeqb %xmm3, %xmm0
+ pmovmskb %xmm0, %esi
+ test $0x8000, %esi
+ jnz L(ashr_15_exittail)
+
+#ifdef USE_AS_STRNCMP
+ cmp $1, %ebp
+ jbe L(ashr_15_exittail)
+#endif
+ pxor %xmm0, %xmm0
+ sub $0x1000, %edi
+ jmp L(gobble_ashr_15)
+
+ .p2align 4
+L(ashr_15_exittail):
+ movdqa (%eax, %ecx), %xmm1
+ psrldq $15, %xmm0
+ psrldq $15, %xmm3
+ jmp L(aftertail)
+
+ .p2align 4
+L(aftertail):
+ pcmpeqb %xmm3, %xmm1
+ psubb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ not %esi
+L(exit):
+ mov %ebx, %edi
+ and $0x1f, %edi
+ lea -16(%edi, %ecx), %edi
+L(less32bytes):
+ add %edi, %edx
+ add %ecx, %eax
+ test $0x20, %ebx
+ jz L(ret2)
+ xchg %eax, %edx
+
+ .p2align 4
+L(ret2):
+ mov %esi, %ecx
+ POP (%esi)
+ POP (%edi)
+ POP (%ebx)
+L(less16bytes):
+ test %cl, %cl
+ jz L(2next_8_bytes)
+
+ test $0x01, %cl
+ jnz L(Byte0)
+
+ test $0x02, %cl
+ jnz L(Byte1)
+
+ test $0x04, %cl
+ jnz L(Byte2)
+
+ test $0x08, %cl
+ jnz L(Byte3)
+
+ test $0x10, %cl
+ jnz L(Byte4)
+
+ test $0x20, %cl
+ jnz L(Byte5)
+
+ test $0x40, %cl
+ jnz L(Byte6)
+#ifdef USE_AS_STRNCMP
+ cmp $7, %ebp
+ jbe L(eq)
+#endif
+
+ movzx 7(%eax), %ecx
+ movzx 7(%edx), %eax
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte0):
+#ifdef USE_AS_STRNCMP
+ cmp $0, %ebp
+ jbe L(eq)
+#endif
+ movzx (%eax), %ecx
+ movzx (%edx), %eax
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte1):
+#ifdef USE_AS_STRNCMP
+ cmp $1, %ebp
+ jbe L(eq)
+#endif
+ movzx 1(%eax), %ecx
+ movzx 1(%edx), %eax
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte2):
+#ifdef USE_AS_STRNCMP
+ cmp $2, %ebp
+ jbe L(eq)
+#endif
+ movzx 2(%eax), %ecx
+ movzx 2(%edx), %eax
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte3):
+#ifdef USE_AS_STRNCMP
+ cmp $3, %ebp
+ jbe L(eq)
+#endif
+ movzx 3(%eax), %ecx
+ movzx 3(%edx), %eax
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte4):
+#ifdef USE_AS_STRNCMP
+ cmp $4, %ebp
+ jbe L(eq)
+#endif
+ movzx 4(%eax), %ecx
+ movzx 4(%edx), %eax
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte5):
+#ifdef USE_AS_STRNCMP
+ cmp $5, %ebp
+ jbe L(eq)
+#endif
+ movzx 5(%eax), %ecx
+ movzx 5(%edx), %eax
+
+ sub %ecx, %eax
+ RETURN
+
+L(Byte6):
+#ifdef USE_AS_STRNCMP
+ cmp $6, %ebp
+ jbe L(eq)
+#endif
+ movzx 6(%eax), %ecx
+ movzx 6(%edx), %eax
+
+ sub %ecx, %eax
+ RETURN
+
+L(2next_8_bytes):
+ add $8, %eax
+ add $8, %edx
+#ifdef USE_AS_STRNCMP
+ cmp $8, %ebp
+ lea -8(%ebp), %ebp
+ jbe L(eq)
+#endif
+
+ test $0x01, %ch
+ jnz L(Byte0)
+
+ test $0x02, %ch
+ jnz L(Byte1)
+
+ test $0x04, %ch
+ jnz L(Byte2)
+
+ test $0x08, %ch
+ jnz L(Byte3)
+
+ test $0x10, %ch
+ jnz L(Byte4)
+
+ test $0x20, %ch
+ jnz L(Byte5)
+
+ test $0x40, %ch
+ jnz L(Byte6)
+
+#ifdef USE_AS_STRNCMP
+ cmp $7, %ebp
+ jbe L(eq)
+#endif
+ movzx 7(%eax), %ecx
+ movzx 7(%edx), %eax
+
+ sub %ecx, %eax
+ RETURN
+
+L(neq):
+ mov $1, %eax
+ ja L(neq_bigger)
+ neg %eax
+L(neq_bigger):
+#ifdef USE_AS_STRNCMP
+ POP (%ebp)
+#endif
+ ret
+
+#ifdef USE_AS_STRNCMP
+ .p2align 4
+ cfi_restore_state
+L(more8byteseq):
+ POP (%esi)
+ POP (%edi)
+ POP (%ebx)
+#endif
+
+L(eq):
+
+#ifdef USE_AS_STRNCMP
+ POP (%ebp)
+#endif
+ xorl %eax, %eax
+ ret
+
+#ifdef USE_AS_STRNCMP
+ .p2align 4
+ CFI_PUSH (%ebp)
+L(less16bytes_sncmp):
+ test %ebp, %ebp
+ jz L(eq)
+
+ movzbl (%eax), %ecx
+ cmpb %cl, (%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $1, %ebp
+ je L(eq)
+
+ movzbl 1(%eax), %ecx
+ cmpb %cl, 1(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $2, %ebp
+ je L(eq)
+
+ movzbl 2(%eax), %ecx
+ cmpb %cl, 2(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $3, %ebp
+ je L(eq)
+
+ movzbl 3(%eax), %ecx
+ cmpb %cl, 3(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $4, %ebp
+ je L(eq)
+
+ movzbl 4(%eax), %ecx
+ cmpb %cl, 4(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $5, %ebp
+ je L(eq)
+
+ movzbl 5(%eax), %ecx
+ cmpb %cl, 5(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $6, %ebp
+ je L(eq)
+
+ movzbl 6(%eax), %ecx
+ cmpb %cl, 6(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $7, %ebp
+ je L(eq)
+
+ movzbl 7(%eax), %ecx
+ cmpb %cl, 7(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+
+ cmp $8, %ebp
+ je L(eq)
+
+ movzbl 8(%eax), %ecx
+ cmpb %cl, 8(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $9, %ebp
+ je L(eq)
+
+ movzbl 9(%eax), %ecx
+ cmpb %cl, 9(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $10, %ebp
+ je L(eq)
+
+ movzbl 10(%eax), %ecx
+ cmpb %cl, 10(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $11, %ebp
+ je L(eq)
+
+ movzbl 11(%eax), %ecx
+ cmpb %cl, 11(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+
+ cmp $12, %ebp
+ je L(eq)
+
+ movzbl 12(%eax), %ecx
+ cmpb %cl, 12(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $13, %ebp
+ je L(eq)
+
+ movzbl 13(%eax), %ecx
+ cmpb %cl, 13(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $14, %ebp
+ je L(eq)
+
+ movzbl 14(%eax), %ecx
+ cmpb %cl, 14(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ cmp $15, %ebp
+ je L(eq)
+
+ movzbl 15(%eax), %ecx
+ cmpb %cl, 15(%edx)
+ jne L(neq)
+ test %cl, %cl
+ je L(eq)
+
+ POP (%ebp)
+ xor %eax, %eax
+ ret
+#endif
+
+END (STRCMP)
+
+#endif
diff --git a/libc/sysdeps/i386/i686/multiarch/strcmp.S b/libc/sysdeps/i386/i686/multiarch/strcmp.S
new file mode 100644
index 000000000..7136d47e8
--- /dev/null
+++ b/libc/sysdeps/i386/i686/multiarch/strcmp.S
@@ -0,0 +1,115 @@
+/* Multiple versions of strcmp
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#ifndef USE_AS_STRNCMP
+# define STRCMP strcmp
+# define __GI_STRCMP __GI_strcmp
+# define __STRCMP_IA32 __strcmp_ia32
+# define __STRCMP_SSSE3 __strcmp_ssse3
+# define __STRCMP_SSE4_2 __strcmp_sse4_2
+#else
+# define STRCMP strncmp
+# define __GI_STRCMP __GI_strncmp
+# define __STRCMP_IA32 __strncmp_ia32
+# define __STRCMP_SSSE3 __strncmp_ssse3
+# define __STRCMP_SSE4_2 __strncmp_sse4_2
+#endif
+
+/* Define multiple versions only for the definition in libc. Don't
+ define multiple versions for strncmp in static library since we
+ need strncmp before the initialization happened. */
+#if (defined SHARED || !defined USE_AS_STRNCMP) && !defined NOT_IN_libc
+# ifdef SHARED
+ .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+ .globl __i686.get_pc_thunk.bx
+ .hidden __i686.get_pc_thunk.bx
+ .p2align 4
+ .type __i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+ movl (%esp), %ebx
+ ret
+
+ .text
+ENTRY(STRCMP)
+ .type STRCMP, @gnu_indirect_function
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (ebx, 0)
+ call __i686.get_pc_thunk.bx
+ addl $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
+ jne 1f
+ call __init_cpu_features
+1: leal __STRCMP_IA32@GOTOFF(%ebx), %eax
+ testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal __STRCMP_SSSE3@GOTOFF(%ebx), %eax
+ testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
+ jz 2f
+ leal __STRCMP_SSE4_2@GOTOFF(%ebx), %eax
+2: popl %ebx
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (ebx)
+ ret
+END(STRCMP)
+# else
+ .text
+ENTRY(STRCMP)
+ .type STRCMP, @gnu_indirect_function
+ cmpl $0, KIND_OFFSET+__cpu_features
+ jne 1f
+ call __init_cpu_features
+1: leal __STRCMP_IA32, %eax
+ testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
+ jz 2f
+ leal __STRCMP_SSSE3, %eax
+ testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
+ jz 2f
+ leal __STRCMP_SSE4_2, %eax
+2: ret
+END(STRCMP)
+# endif
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type __STRCMP_IA32, @function; \
+ .p2align 4; \
+ __STRCMP_IA32: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size __STRCMP_IA32, .-__STRCMP_IA32
+
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+ they will be called without setting up EBX needed for PLT which is
+ used by IFUNC. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_STRCMP; __GI_STRCMP = __STRCMP_IA32
+# endif
+#endif
+
+#ifndef USE_AS_STRNCMP
+# include "../strcmp.S"
+#endif
diff --git a/libc/sysdeps/i386/i686/multiarch/strncmp-c.c b/libc/sysdeps/i386/i686/multiarch/strncmp-c.c
new file mode 100644
index 000000000..cc059da49
--- /dev/null
+++ b/libc/sysdeps/i386/i686/multiarch/strncmp-c.c
@@ -0,0 +1,8 @@
+#ifdef SHARED
+# define STRNCMP __strncmp_ia32
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name) \
+ __hidden_ver1 (__strncmp_ia32, __GI_strncmp, __strncmp_ia32);
+#endif
+
+#include "string/strncmp.c"
diff --git a/libc/sysdeps/i386/i686/multiarch/strncmp-sse4.S b/libc/sysdeps/i386/i686/multiarch/strncmp-sse4.S
new file mode 100644
index 000000000..cf14dfaf6
--- /dev/null
+++ b/libc/sysdeps/i386/i686/multiarch/strncmp-sse4.S
@@ -0,0 +1,5 @@
+#ifdef SHARED
+# define USE_AS_STRNCMP
+# define STRCMP __strncmp_sse4_2
+# include "strcmp-sse4.S"
+#endif
diff --git a/libc/sysdeps/i386/i686/multiarch/strncmp-ssse3.S b/libc/sysdeps/i386/i686/multiarch/strncmp-ssse3.S
new file mode 100644
index 000000000..536c8685f
--- /dev/null
+++ b/libc/sysdeps/i386/i686/multiarch/strncmp-ssse3.S
@@ -0,0 +1,5 @@
+#ifdef SHARED
+# define USE_AS_STRNCMP
+# define STRCMP __strncmp_ssse3
+# include "strcmp-ssse3.S"
+#endif
diff --git a/libc/sysdeps/i386/i686/multiarch/strncmp.S b/libc/sysdeps/i386/i686/multiarch/strncmp.S
new file mode 100644
index 000000000..b6814315f
--- /dev/null
+++ b/libc/sysdeps/i386/i686/multiarch/strncmp.S
@@ -0,0 +1,3 @@
+#define USE_AS_STRNCMP
+#define STRCMP strncmp
+#include "strcmp.S"
diff --git a/libc/sysdeps/i386/lshift.S b/libc/sysdeps/i386/lshift.S
index 536d9878e..398cf038c 100644
--- a/libc/sysdeps/i386/lshift.S
+++ b/libc/sysdeps/i386/lshift.S
@@ -1,5 +1,5 @@
/* i80386 __mpn_lshift --
- Copyright (C) 1992, 1994, 1997-2000, 2005 Free Software Foundation, Inc.
+ Copyright (C) 1992,1994,1997-2000,2005,2010 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
@@ -55,6 +55,7 @@ ENTRY (BP_SYM (__mpn_lshift))
movl (%esi,%edx,4),%ebx /* read most significant limb */
cfi_rel_offset (ebx, 0)
+ cfi_remember_state
xorl %eax,%eax
shldl %cl,%ebx,%eax /* compute carry limb */
decl %edx
@@ -95,6 +96,7 @@ L(1): movl (%esi,%edx,4),%eax
LEAVE
ret
+ cfi_restore_state
L(end): shll %cl,%ebx /* compute least significant limb */
movl %ebx,(%edi) /* store it */
diff --git a/libc/sysdeps/i386/rshift.S b/libc/sysdeps/i386/rshift.S
index 3fd0afe82..332c4d09e 100644
--- a/libc/sysdeps/i386/rshift.S
+++ b/libc/sysdeps/i386/rshift.S
@@ -1,5 +1,5 @@
/* i80386 __mpn_rshift --
- Copyright (C) 1992,1994,1997-2000,2005 Free Software Foundation, Inc.
+ Copyright (C) 1992,1994,1997-2000,2005,2010 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
@@ -57,6 +57,7 @@ ENTRY (BP_SYM (__mpn_rshift))
movl (%esi,%edx,4),%ebx /* read least significant limb */
cfi_rel_offset (ebx, 0)
+ cfi_remember_state
xorl %eax,%eax
shrdl %cl,%ebx,%eax /* compute carry limb */
incl %edx
@@ -97,10 +98,7 @@ L(1): movl (%esi,%edx,4),%eax
LEAVE
ret
- cfi_adjust_cfa_offset (12)
- cfi_rel_offset (edi, 8)
- cfi_rel_offset (esi, 4)
- cfi_rel_offset (ebx, 0)
+ cfi_restore_state
L(end): shrl %cl,%ebx /* compute most significant limb */
movl %ebx,(%edi) /* store it */
diff --git a/libc/sysdeps/ia64/fpu/fegetenv.c b/libc/sysdeps/ia64/fpu/fegetenv.c
index 5446b1649..e240f75e4 100644
--- a/libc/sysdeps/ia64/fpu/fegetenv.c
+++ b/libc/sysdeps/ia64/fpu/fegetenv.c
@@ -27,3 +27,4 @@ fegetenv (fenv_t *envp)
return 0;
}
+libm_hidden_def (fegetenv)
diff --git a/libc/sysdeps/ia64/memchr.S b/libc/sysdeps/ia64/memchr.S
index cdd71ca5a..355effe94 100644
--- a/libc/sysdeps/ia64/memchr.S
+++ b/libc/sysdeps/ia64/memchr.S
@@ -1,6 +1,6 @@
/* Optimized version of the standard memchr() function.
This file is part of the GNU C Library.
- Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
+ Copyright (C) 2000, 2001, 2003, 2010 Free Software Foundation, Inc.
Contributed by Dan Pop <Dan.Pop@cern.ch>.
The GNU C Library is free software; you can redistribute it and/or
@@ -21,9 +21,9 @@
/* Return: the address of the first occurence of chr in str or NULL
Inputs:
- in0: str
- in1: chr
- in2: byte count
+ in0: str
+ in1: chr
+ in2: byte count
This implementation assumes little endian mode. For big endian mode,
the instruction czx1.r should be replaced by czx1.l.
@@ -47,7 +47,7 @@
#define saved_lc r16
#define chr r17
#define len r18
-#define pos0 r20
+#define last r20
#define val r21
#define tmp r24
#define chrx8 r25
@@ -62,12 +62,13 @@ ENTRY(__memchr)
.rotr value[MEMLAT+1], addr[MEMLAT+3], aux[2], poschr[2]
.rotp p[MEMLAT+3]
.save ar.lc, saved_lc
- mov saved_lc = ar.lc // save the loop counter
+ mov saved_lc = ar.lc // save the loop counter
.save pr, saved_pr
mov saved_pr = pr // save the predicates
.body
- mov ret0 = str
- and tmp = 7, str // tmp = str % 8
+ mov ret0 = str
+ add last = str, in2 // last byte
+ and tmp = 7, str // tmp = str % 8
cmp.ne p7, p0 = r0, r0 // clear p7
extr.u chr = in1, 0, 8 // chr = (unsigned char) in1
mov len = in2
@@ -88,7 +89,7 @@ ENTRY(__memchr)
.str_aligned:
cmp.ne p6, p0 = r0, r0 // clear p6
shr.u loopcnt = len, 3 // loopcnt = len / 8
- and len = 7, len ;; // remaining len = len & 7
+ and len = 7, len ;; // remaining len = len & 7
adds loopcnt = -1, loopcnt
mov ar.ec = MEMLAT + 3
mux1 chrx8 = chr, @brcst ;; // get a word full of chr
@@ -119,7 +120,7 @@ ENTRY(__memchr)
mov ret0 = r0 ;; // return NULL
.foundit:
.pred.rel "mutex" p6, p7
-(p6) adds ret0 = -1, ret0 // if we got here from l1 or l3
+(p6) adds ret0 = -1, ret0 // if we got here from l1 or l3
(p7) add ret0 = addr[MEMLAT+2], poschr[1] // if we got here from l2
mov pr = saved_pr, -1
mov ar.lc = saved_lc
@@ -143,7 +144,10 @@ ENTRY(__memchr)
ld8 tmp = [ret0];; // load the first unchecked 8byte
xor aux[1] = tmp, chrx8;;
czx1.r poschr[1] = aux[1];;
- cmp.ne p7, p0 = 8, poschr[1]
+ cmp.ne p7, p0 = 8, poschr[1];;
+(p7) add ret0 = addr[MEMLAT+2], poschr[1];;
+(p7) cmp.geu p6, p7 = ret0, last // don't go over the last byte
+(p6) br.cond.spnt .notfound;;
(p7) br.cond.spnt .foundit;;
adds ret0 = 8, ret0 // load the next unchecked 8byte
br.sptk .l4;;
diff --git a/libc/sysdeps/powerpc/fpu/fegetenv.c b/libc/sysdeps/powerpc/fpu/fegetenv.c
index 53953454c..3d21abb52 100644
--- a/libc/sysdeps/powerpc/fpu/fegetenv.c
+++ b/libc/sysdeps/powerpc/fpu/fegetenv.c
@@ -35,4 +35,5 @@ strong_alias (__fegetenv, __old_fegetenv)
compat_symbol (libm, BP_SYM (__old_fegetenv), BP_SYM (fegetenv), GLIBC_2_1);
#endif
+libm_hidden_ver (__fegetenv, fegetenv)
versioned_symbol (libm, BP_SYM (__fegetenv), BP_SYM (fegetenv), GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/configure b/libc/sysdeps/powerpc/powerpc32/configure
index 9b76c5788..da8ec0b87 100644
--- a/libc/sysdeps/powerpc/powerpc32/configure
+++ b/libc/sysdeps/powerpc/powerpc32/configure
@@ -25,11 +25,10 @@ rm -f conftest*
fi
{ $as_echo "$as_me:$LINENO: result: $libc_cv_ppc_rel16" >&5
$as_echo "$libc_cv_ppc_rel16" >&6; }
-if test $libc_cv_ppc_rel16 = yes; then
- cat >>confdefs.h <<\_ACEOF
-#define HAVE_ASM_PPC_REL16 1
-_ACEOF
-
+if test $libc_cv_ppc_rel16 = no; then
+ { { $as_echo "$as_me:$LINENO: error: R_PPC_REL16 is not supported. Binutils is too old." >&5
+$as_echo "$as_me: error: R_PPC_REL16 is not supported. Binutils is too old." >&2;}
+ { (exit 1); exit 1; }; }
fi
# See whether GCC uses -msecure-plt.
diff --git a/libc/sysdeps/powerpc/powerpc32/configure.in b/libc/sysdeps/powerpc/powerpc32/configure.in
index 7219ad993..21d3f5ee5 100644
--- a/libc/sysdeps/powerpc/powerpc32/configure.in
+++ b/libc/sysdeps/powerpc/powerpc32/configure.in
@@ -13,8 +13,8 @@ else
libc_cv_ppc_rel16=no
fi
rm -f conftest*])
-if test $libc_cv_ppc_rel16 = yes; then
- AC_DEFINE(HAVE_ASM_PPC_REL16)
+if test $libc_cv_ppc_rel16 = no; then
+ AC_MSG_ERROR(R_PPC_REL16 is not supported. Binutils is too old.)
fi
# See whether GCC uses -msecure-plt.
diff --git a/libc/sysdeps/powerpc/powerpc32/dl-machine.h b/libc/sysdeps/powerpc/powerpc32/dl-machine.h
index 6f8d0f506..5351d9691 100644
--- a/libc/sysdeps/powerpc/powerpc32/dl-machine.h
+++ b/libc/sysdeps/powerpc/powerpc32/dl-machine.h
@@ -41,16 +41,13 @@ static inline Elf32_Addr * __attribute__ ((const))
ppc_got (void)
{
Elf32_Addr *got;
-#ifdef HAVE_ASM_PPC_REL16
+
asm ("bcl 20,31,1f\n"
"1: mflr %0\n"
" addis %0,%0,_GLOBAL_OFFSET_TABLE_-1b@ha\n"
" addi %0,%0,_GLOBAL_OFFSET_TABLE_-1b@l\n"
: "=b" (got) : : "lr");
-#else
- asm (" bl _GLOBAL_OFFSET_TABLE_-4@local"
- : "=l" (got));
-#endif
+
return got;
}
diff --git a/libc/sysdeps/powerpc/powerpc32/dl-start.S b/libc/sysdeps/powerpc/powerpc32/dl-start.S
index c77c4de19..ae41f47ed 100644
--- a/libc/sysdeps/powerpc/powerpc32/dl-start.S
+++ b/libc/sysdeps/powerpc/powerpc32/dl-start.S
@@ -47,15 +47,10 @@ _dl_start_user:
passed by value!). */
/* Put our GOT pointer in r31, */
-#ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r31
addis r31,r31,_GLOBAL_OFFSET_TABLE_-1b@ha
addi r31,r31,_GLOBAL_OFFSET_TABLE_-1b@l
-#else
- bl _GLOBAL_OFFSET_TABLE_-4@local
- mflr r31
-#endif
/* the address of _start in r30, */
mr r30,r3
/* &_dl_argc in 29, &_dl_argv in 27, and _dl_loaded in 28. */
diff --git a/libc/sysdeps/powerpc/powerpc32/elf/start.S b/libc/sysdeps/powerpc/powerpc32/elf/start.S
index a8abdca0c..dc89a5e10 100644
--- a/libc/sysdeps/powerpc/powerpc32/elf/start.S
+++ b/libc/sysdeps/powerpc/powerpc32/elf/start.S
@@ -53,10 +53,6 @@ L(start_addresses):
ASM_SIZE_DIRECTIVE(L(start_addresses))
.section ".text"
-#if defined PIC && !defined HAVE_ASM_PPC_REL16
-L(start_addressesp):
- .long L(start_addresses)-L(branch)
-#endif
ENTRY(_start)
/* Save the stack pointer, in case we're statically linked under Linux. */
mr r9,r1
@@ -77,16 +73,10 @@ L(branch):
start_addresses in r8. Also load the GOT pointer so that new PLT
calls work, like the one to __libc_start_main. */
#ifdef PIC
-# ifdef HAVE_ASM_PPC_REL16
addis r30,r13,_GLOBAL_OFFSET_TABLE_-L(branch)@ha
addis r8,r13,L(start_addresses)-L(branch)@ha
addi r30,r30,_GLOBAL_OFFSET_TABLE_-L(branch)@l
lwzu r13,L(start_addresses)-L(branch)@l(r8)
-# else
- lwz r8,L(start_addressesp)-L(branch)(r13)
- add r8,r13,r8
- lwz r13,0(r8)
-# endif
#else
lis r8,L(start_addresses)@ha
lwzu r13,L(start_addresses)@l(r8)
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S b/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
index 04ed6da68..e1ac064a5 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
@@ -34,15 +34,10 @@ ENTRY (BP_SYM (__longjmp))
# ifdef PIC
mflr r6
cfi_register (lr,r6)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r5
addis r5,r5,_GLOBAL_OFFSET_TABLE_-1b@ha
addi r5,r5,_GLOBAL_OFFSET_TABLE_-1b@l
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r5
-# endif
# ifdef SHARED
lwz r5,_rtld_global_ro@got(r5)
mtlr r6
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_ceil.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_ceil.S
index bc74d302f..80e72ca2b 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_ceil.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_ceil.S
@@ -31,17 +31,10 @@ ENTRY (__ceil)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
lfs fp13,.LC0-1b@l(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
- lfs fp13,0(r9)
-# endif
mtlr r11
cfi_same_value (lr)
#else
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S
index 47a75ec0c..ce6d71e4f 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S
@@ -30,17 +30,10 @@ ENTRY (__ceilf)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
lfs fp13,.LC0-1b@l(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
- lfs fp13,0(r9)
-# endif
mtlr r11
cfi_same_value (lr)
#else
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_floor.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_floor.S
index a29e4791e..0dd0dbe6c 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_floor.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_floor.S
@@ -31,17 +31,10 @@ ENTRY (__floor)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
lfs fp13,.LC0-1b@l(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
- lfs fp13,0(r9)
-# endif
mtlr r11
cfi_same_value (lr)
#else
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_floorf.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_floorf.S
index 99fbdc5f8..98a47458b 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_floorf.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_floorf.S
@@ -30,17 +30,10 @@ ENTRY (__floorf)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
lfs fp13,.LC0-1b@l(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
- lfs fp13,0(r9)
-# endif
mtlr r11
cfi_same_value (lr)
#else
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_lround.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_lround.S
index d73749e13..3bf1ffaea 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_lround.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_lround.S
@@ -45,17 +45,10 @@ ENTRY (__lround)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
lfs fp10,.LC0-1b@l(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
- lfs fp10,0(r9)
-# endif
mtlr r11
cfi_same_value (lr)
#else
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_rint.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_rint.S
index c8dca313a..93133718a 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_rint.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_rint.S
@@ -33,17 +33,10 @@ ENTRY (__rint)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
lfs fp13,.LC0-1b@l(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
- lfs fp13,0(r9)
-# endif
mtlr r11
cfi_same_value (lr)
#else
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_rintf.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
index 7771cb2bc..1e0fbb1f0 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
@@ -29,17 +29,10 @@ ENTRY (__rintf)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
lfs fp13,.LC0-1b@l(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
- lfs fp13,0(r9)
-# endif
mtlr r11
cfi_same_value (lr)
#else
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_round.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_round.S
index 590c87ad8..48b346e65 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_round.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_round.S
@@ -43,16 +43,10 @@ ENTRY (__round)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
addi r9,r9,.LC0-1b@l
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
-# endif
mtlr r11
cfi_same_value (lr)
lfs fp13,0(r9)
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_roundf.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
index 7e99bca31..88125aad0 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
@@ -42,16 +42,10 @@ ENTRY (__roundf )
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
addi r9,r9,.LC0-1b@l
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
-# endif
mtlr r11
cfi_same_value (lr)
lfs fp13,0(r9)
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_trunc.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_trunc.S
index 5bc0856b9..c3c021716 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_trunc.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_trunc.S
@@ -38,17 +38,10 @@ ENTRY (__trunc)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
lfs fp13,.LC0-1b@l(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
- lfs fp13,0(r9)
-# endif
mtlr r11
cfi_same_value (lr)
#else
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_truncf.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_truncf.S
index e2e3bd674..eddef070c 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_truncf.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_truncf.S
@@ -37,17 +37,10 @@ ENTRY (__truncf)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
lfs fp13,.LC0-1b@l(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
- lfs fp13,0(r9)
-# endif
mtlr r11
cfi_same_value (lr)
#else
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S b/libc/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
index b7d1abc00..131e7a332 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
@@ -85,15 +85,10 @@ ENTRY (BP_SYM (__sigsetjmp))
# ifdef PIC
mflr r6
cfi_register(lr,r6)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r5
addis r5,r5,_GLOBAL_OFFSET_TABLE_-1b@ha
addi r5,r5,_GLOBAL_OFFSET_TABLE_-1b@l
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r5
-# endif
mtlr r6
cfi_same_value (lr)
# ifdef SHARED
diff --git a/libc/sysdeps/powerpc/powerpc32/memset.S b/libc/sysdeps/powerpc/powerpc32/memset.S
index 454abb2b6..b4ce218e2 100644
--- a/libc/sysdeps/powerpc/powerpc32/memset.S
+++ b/libc/sysdeps/powerpc/powerpc32/memset.S
@@ -256,17 +256,10 @@ L(checklinesize):
beq L(medium)
/* Establishes GOT addressability so we can load __cache_line_size
from static. This value was set from the aux vector during startup. */
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr rGOT
addis rGOT,rGOT,__cache_line_size-1b@ha
lwz rCLS,__cache_line_size-1b@l(rGOT)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr rGOT
- lwz rGOT,__cache_line_size@got(rGOT)
- lwz rCLS,0(rGOT)
-# endif
mtlr rTMP
#else
/* Load __cache_line_size from static. This value was set from the
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
index e10a37977..b03e041d8 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
@@ -53,16 +53,10 @@ ENTRY (__llround)
#ifdef SHARED
mflr r11
cfi_register(lr,r11)
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r9
addis r9,r9,.LC0-1b@ha
addi r9,r9,.LC0-1b@l
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r10
- lwz r9,.LC0@got(10)
-# endif
mtlr r11
cfi_same_value (lr)
lfd fp9,0(r9)
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S b/libc/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S
index 95a0b3915..8be3cf184 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S
@@ -63,7 +63,6 @@ EALIGN (__sqrt, 5, 0)
cfi_offset(lr,20-16)
cfi_offset(r30,8-16)
#ifdef SHARED
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,.LCF1
.LCF1:
mflr r30
@@ -71,12 +70,6 @@ EALIGN (__sqrt, 5, 0)
addi r30,r30,_GLOBAL_OFFSET_TABLE_-.LCF1@l
lwz r9,_LIB_VERSION@got(30)
lwz r0,0(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r30
- lwz r9,_LIB_VERSION@got(30)
- lwz r0,0(r9)
-# endif
#else
lis r9,_LIB_VERSION@ha
lwz r0,_LIB_VERSION@l(r9)
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S b/libc/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S
index c31555194..9fa282c16 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S
@@ -63,7 +63,6 @@ EALIGN (__sqrtf, 5, 0)
cfi_offset(lr,20-16)
cfi_offset(r30,8-16)
#ifdef SHARED
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,.LCF1
.LCF1:
mflr r30
@@ -71,12 +70,6 @@ EALIGN (__sqrtf, 5, 0)
addi r30,r30,_GLOBAL_OFFSET_TABLE_-.LCF1@l
lwz r9,_LIB_VERSION@got(30)
lwz r0,0(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r30
- lwz r9,_LIB_VERSION@got(30)
- lwz r0,0(r9)
-# endif
#else
lis r9,_LIB_VERSION@ha
lwz r0,_LIB_VERSION@l(r9)
diff --git a/libc/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S b/libc/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S
index 105b5912a..27a1a0dcb 100644
--- a/libc/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S
+++ b/libc/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S
@@ -63,7 +63,6 @@ EALIGN (__sqrt, 5, 0)
cfi_offset(lr,20-16)
cfi_offset(r30,8-16)
#ifdef SHARED
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,.LCF1
.LCF1:
mflr r30
@@ -71,12 +70,6 @@ EALIGN (__sqrt, 5, 0)
addi r30,r30,_GLOBAL_OFFSET_TABLE_-.LCF1@l
lwz r9,_LIB_VERSION@got(30)
lwz r0,0(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r30
- lwz r9,_LIB_VERSION@got(30)
- lwz r0,0(r9)
-# endif
#else
lis r9,_LIB_VERSION@ha
lwz r0,_LIB_VERSION@l(r9)
diff --git a/libc/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S b/libc/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S
index 14bc0a2ce..891485554 100644
--- a/libc/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S
+++ b/libc/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S
@@ -63,7 +63,6 @@ EALIGN (__sqrtf, 5, 0)
cfi_offset(lr,20-16)
cfi_offset(r30,8-16)
#ifdef SHARED
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,.LCF1
.LCF1:
mflr r30
@@ -71,12 +70,6 @@ EALIGN (__sqrtf, 5, 0)
addi r30,r30,_GLOBAL_OFFSET_TABLE_-.LCF1@l
lwz r9,_LIB_VERSION@got(30)
lwz r0,0(r9)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r30
- lwz r9,_LIB_VERSION@got(30)
- lwz r0,0(r9)
-# endif
#else
lis r9,_LIB_VERSION@ha
lwz r0,_LIB_VERSION@l(r9)
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/Implies b/libc/sysdeps/powerpc/powerpc32/power7/Implies
deleted file mode 100644
index 03899d8a3..000000000
--- a/libc/sysdeps/powerpc/powerpc32/power7/Implies
+++ /dev/null
@@ -1 +0,0 @@
-powerpc/powerpc32/power5
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/Implies b/libc/sysdeps/powerpc/powerpc32/power7/fpu/Implies
deleted file mode 100644
index 819a7d797..000000000
--- a/libc/sysdeps/powerpc/powerpc32/power7/fpu/Implies
+++ /dev/null
@@ -1 +0,0 @@
-powerpc/powerpc32/power5/fpu
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S
new file mode 100644
index 000000000..5b0d950c7
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S
@@ -0,0 +1,89 @@
+/* finite(). PowerPC32/POWER7 version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Luis Machado <luisgpm@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* int __finite(x) */
+ .section .rodata.cst8,"aM",@progbits,8
+ .align 3
+.LC0: /* 1.0 */
+ .quad 0x3ff0000000000000
+
+ .section ".text"
+ .type __finite, @function
+ .machine power7
+ENTRY (__finite)
+#ifdef SHARED
+ mflr r11
+ cfi_register(lr,r11)
+
+ bcl 20,31,1f
+1: mflr r9
+ addis r9,r9,.LC0-1b@ha
+ lfd fp0,.LC0-1b@l(r9)
+
+ mtlr r11
+ cfi_same_value (lr)
+#else
+ lis r9,.LC0@ha
+ lfd fp0,.LC0@l(r9)
+#endif
+ ftdiv cr7,fp1,fp0
+ li r3,1
+ bflr 30
+
+ /* We have -INF/+INF/NaN or a denormal. */
+
+ stwu r1,-16(r1) /* Allocate stack space. */
+ stfd fp1,8(r1) /* Transfer FP to GPR's. */
+
+ ori 2,2,0 /* Force a new dispatch group. */
+ lhz r0,8(r1) /* Fetch the upper portion of the high word of
+ the FP value (where the exponent and sign bits
+ are). */
+ clrlwi r0,r0,17 /* r0 = abs(r0). */
+ addi r1,r1,16 /* Reset the stack pointer. */
+ cmpwi cr7,r0,0x7ff0 /* r4 == 0x7ff0?. */
+ bltlr cr7 /* LT means we have a denormal. */
+ li r3,0
+ blr
+ END (__finite)
+
+hidden_def (__finite)
+weak_alias (__finite, finite)
+
+/* It turns out that the 'double' version will also always work for
+ single-precision. */
+strong_alias (__finite, __finitef)
+hidden_def (__finitef)
+weak_alias (__finitef, finitef)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__finite, __finitel)
+weak_alias (__finite, finitel)
+#endif
+
+#ifndef IS_IN_libm
+# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0)
+compat_symbol (libc, __finite, __finitel, GLIBC_2_0);
+compat_symbol (libc, finite, finitel, GLIBC_2_0);
+# endif
+#endif
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finitef.S b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finitef.S
new file mode 100644
index 000000000..54bd94176
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finitef.S
@@ -0,0 +1 @@
+/* This function uses the same code as s_finite.S. */
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S
new file mode 100644
index 000000000..297953491
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S
@@ -0,0 +1,88 @@
+/* isinf(). PowerPC32/POWER7 version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Luis Machado <luisgpm@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* int __isinf(x) */
+ .section .rodata.cst8,"aM",@progbits,8
+ .align 3
+.LC0: /* 1.0 */
+ .quad 0x3ff0000000000000
+
+ .section ".text"
+ .type __isinf, @function
+ .machine power7
+ENTRY (__isinf)
+#ifdef SHARED
+ mflr r11
+ cfi_register(lr,r11)
+
+ bcl 20,31,1f
+1: mflr r9
+ addis r9,r9,.LC0-1b@ha
+ lfd fp0,.LC0-1b@l(r9)
+
+ mtlr r11
+ cfi_same_value (lr)
+#else
+ lis r9,.LC0@ha
+ lfd fp0,.LC0@l(r9)
+#endif
+ ftdiv cr7,fp1,fp0
+ li r3,0
+ bflr 29 /* If not INF, return. */
+
+ /* Either we have -INF/+INF or a denormal. */
+
+ stwu r1,-16(r1) /* Allocate stack space. */
+ stfd fp1,8(r1) /* Transfer FP to GPR's. */
+ ori 2,2,0 /* Force a new dispatch group. */
+ lhz r4,8(r1) /* Fetch the upper portion of the high word of
+ the FP value (where the exponent and sign bits
+ are). */
+ addi r1,r1,16 /* Reset the stack pointer. */
+ cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */
+ li r3,1
+ beqlr cr7 /* EQ means INF, otherwise -INF. */
+ li r3,-1
+ blr
+ END (__isinf)
+
+hidden_def (__isinf)
+weak_alias (__isinf, isinf)
+
+/* It turns out that the 'double' version will also always work for
+ single-precision. */
+strong_alias (__isinf, __isinff)
+hidden_def (__isinff)
+weak_alias (__isinff, isinff)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__isinf, __isinfl)
+weak_alias (__isinf, isinfl)
+#endif
+
+#ifndef IS_IN_libm
+# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0)
+compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0);
+compat_symbol (libc, isinf, isinfl, GLIBC_2_0);
+# endif
+#endif
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinff.S b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinff.S
new file mode 100644
index 000000000..be759e091
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinff.S
@@ -0,0 +1 @@
+/* This function uses the same code as s_isinf.S. */
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S
new file mode 100644
index 000000000..852539f24
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S
@@ -0,0 +1,92 @@
+/* isnan(). PowerPC32/POWER7 version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Luis Machado <luisgpm@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* int __isnan(x) */
+ .section .rodata.cst8,"aM",@progbits,8
+ .align 3
+.LC0: /* 1.0 */
+ .quad 0x3ff0000000000000
+
+ .section ".text"
+ .type __isnan, @function
+ .machine power7
+ENTRY (__isnan)
+#ifdef SHARED
+ mflr r11
+ cfi_register(lr,r11)
+
+ bcl 20,31,1f
+1: mflr r9
+ addis r9,r9,.LC0-1b@ha
+ lfd fp0,.LC0-1b@l(r9)
+
+ mtlr r11
+ cfi_same_value (lr)
+#else
+ lis r9,.LC0@ha
+ lfd fp0,.LC0@l(r9)
+#endif
+ ftdiv cr7,fp1,fp0
+ li r3,0
+ bflr 30 /* If not NaN or Inf, finish. */
+
+ /* We have -INF/+INF/NaN or a denormal. */
+
+ stwu r1,-16(r1) /* Allocate stack space. */
+ stfd fp1,8(r1) /* Transfer FP to GPR's. */
+ ori 2,2,0 /* Force a new dispatch group. */
+ lwz r4,8(r1) /* Load the upper half of the FP value. */
+ lwz r5,12(r1) /* Load the lower half of the FP value. */
+ addi r1,r1,16 /* Reset the stack pointer. */
+ lis r0,0x7ff0 /* Load the upper portion for an INF/NaN. */
+ clrlwi r4,r4,1 /* r4 = abs(r4). */
+ cmpw cr7,r4,r0 /* if (abs(r4) <= inf). */
+ cmpwi cr6,r5,0 /* r5 == 0x00000000? */
+ bltlr cr7 /* LT means we have a denormal. */
+ bgt cr7,L(NaN) /* GT means we have a NaN. */
+ beqlr cr6 /* EQ means we have +/-INF. */
+L(NaN):
+ li r3,1 /* x == NaN? */
+ blr
+ END (__isnan)
+
+hidden_def (__isnan)
+weak_alias (__isnan, isnan)
+
+/* It turns out that the 'double' version will also always work for
+ single-precision. */
+strong_alias (__isnan, __isnanf)
+hidden_def (__isnanf)
+weak_alias (__isnanf, isnanf)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__isnan, __isnanl)
+weak_alias (__isnan, isnanl)
+#endif
+
+#ifndef IS_IN_libm
+# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0)
+compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0);
+compat_symbol (libc, isnan, isnanl, GLIBC_2_0);
+# endif
+#endif
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S
new file mode 100644
index 000000000..b48c85e0d
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnanf.S
@@ -0,0 +1 @@
+/* This function uses the same code as s_isnan.S. */
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/memcpy.S b/libc/sysdeps/powerpc/powerpc32/power7/memcpy.S
new file mode 100644
index 000000000..e3dfd2ff9
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/power7/memcpy.S
@@ -0,0 +1,469 @@
+/* Optimized memcpy implementation for PowerPC32/POWER7.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Luis Machado <luisgpm@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
+ Returns 'dst'. */
+
+ .machine power7
+EALIGN (BP_SYM (memcpy), 5, 0)
+ CALL_MCOUNT
+
+ stwu 1,-32(1)
+ cfi_adjust_cfa_offset(32)
+ stw 30,20(1)
+ cfi_offset(30,(20-32))
+ stw 31,24(1)
+ mr 30,3
+ cmplwi cr1,5,31
+ neg 0,3
+ cfi_offset(31,-8)
+ ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move
+ code. */
+
+ andi. 11,3,7 /* Check alignment of DST. */
+ clrlwi 10,4,29 /* Check alignment of SRC. */
+ cmplw cr6,10,11 /* SRC and DST alignments match? */
+ mr 12,4
+ mr 31,5
+ bne cr6,L(copy_GE_32_unaligned)
+
+ srwi 9,5,3 /* Number of full quadwords remaining. */
+
+ beq L(copy_GE_32_aligned_cont)
+
+ clrlwi 0,0,29
+ mtcrf 0x01,0
+ subf 31,0,5
+
+ /* Get the SRC aligned to 8 bytes. */
+
+1: bf 31,2f
+ lbz 6,0(12)
+ addi 12,12,1
+ stb 6,0(3)
+ addi 3,3,1
+2: bf 30,4f
+ lhz 6,0(12)
+ addi 12,12,2
+ sth 6,0(3)
+ addi 3,3,2
+4: bf 29,0f
+ lwz 6,0(12)
+ addi 12,12,4
+ stw 6,0(3)
+ addi 3,3,4
+0:
+ clrlwi 10,12,29 /* Check alignment of SRC again. */
+ srwi 9,31,3 /* Number of full doublewords remaining. */
+
+L(copy_GE_32_aligned_cont):
+
+ clrlwi 11,31,29
+ mtcrf 0x01,9
+
+ srwi 8,31,5
+ cmplwi cr1,9,4
+ cmplwi cr6,11,0
+ mr 11,12
+
+ /* Copy 1~3 doublewords so the main loop starts
+ at a multiple of 32 bytes. */
+
+ bf 30,1f
+ lfd 6,0(12)
+ lfd 7,8(12)
+ addi 11,12,16
+ mtctr 8
+ stfd 6,0(3)
+ stfd 7,8(3)
+ addi 10,3,16
+ bf 31,4f
+ lfd 0,16(12)
+ stfd 0,16(3)
+ blt cr1,3f
+ addi 11,12,24
+ addi 10,3,24
+ b 4f
+
+ .align 4
+1: /* Copy 1 doubleword and set the counter. */
+ mr 10,3
+ mtctr 8
+ bf 31,4f
+ lfd 6,0(12)
+ addi 11,12,8
+ stfd 6,0(3)
+ addi 10,3,8
+
+ .align 4
+4: /* Main aligned copy loop. Copies 32-bytes at a time. */
+ lfd 6,0(11)
+ lfd 7,8(11)
+ lfd 8,16(11)
+ lfd 0,24(11)
+ addi 11,11,32
+
+ stfd 6,0(10)
+ stfd 7,8(10)
+ stfd 8,16(10)
+ stfd 0,24(10)
+ addi 10,10,32
+ bdnz 4b
+3:
+
+ /* Check for tail bytes. */
+
+ clrrwi 0,31,3
+ mtcrf 0x01,31
+ beq cr6,0f
+
+.L9:
+ add 3,3,0
+ add 12,12,0
+
+ /* At this point we have a tail of 0-7 bytes and we know that the
+ destination is doubleword-aligned. */
+4: /* Copy 4 bytes. */
+ bf 29,2f
+
+ lwz 6,0(12)
+ addi 12,12,4
+ stw 6,0(3)
+ addi 3,3,4
+2: /* Copy 2 bytes. */
+ bf 30,1f
+
+ lhz 6,0(12)
+ addi 12,12,2
+ sth 6,0(3)
+ addi 3,3,2
+1: /* Copy 1 byte. */
+ bf 31,0f
+
+ lbz 6,0(12)
+ stb 6,0(3)
+0: /* Return original DST pointer. */
+ mr 3,30
+ lwz 30,20(1)
+ lwz 31,24(1)
+ addi 1,1,32
+ blr
+
+ /* Handle copies of 0~31 bytes. */
+ .align 4
+L(copy_LT_32):
+ cmplwi cr6,5,8
+ mr 12,4
+ mtcrf 0x01,5
+ ble cr6,L(copy_LE_8)
+
+ /* At least 9 bytes to go. */
+ neg 8,4
+ clrrwi 11,4,2
+ andi. 0,8,3
+ cmplwi cr1,5,16
+ mr 10,5
+ beq L(copy_LT_32_aligned)
+
+ /* Force 4-bytes alignment for SRC. */
+ mtocrf 0x01,0
+ subf 10,0,5
+2: bf 30,1f
+
+ lhz 6,0(12)
+ addi 12,12,2
+ sth 6,0(3)
+ addi 3,3,2
+1: bf 31,L(end_4bytes_alignment)
+
+ lbz 6,0(12)
+ addi 12,12,1
+ stb 6,0(3)
+ addi 3,3,1
+
+ .align 4
+L(end_4bytes_alignment):
+ cmplwi cr1,10,16
+ mtcrf 0x01,10
+
+L(copy_LT_32_aligned):
+ /* At least 6 bytes to go, and SRC is word-aligned. */
+ blt cr1,8f
+
+ /* Copy 16 bytes. */
+ lwz 6,0(12)
+ lwz 7,4(12)
+ stw 6,0(3)
+ lwz 8,8(12)
+ stw 7,4(3)
+ lwz 6,12(12)
+ addi 12,12,16
+ stw 8,8(3)
+ stw 6,12(3)
+ addi 3,3,16
+8: /* Copy 8 bytes. */
+ bf 28,4f
+
+ lwz 6,0(12)
+ lwz 7,4(12)
+ addi 12,12,8
+ stw 6,0(3)
+ stw 7,4(3)
+ addi 3,3,8
+4: /* Copy 4 bytes. */
+ bf 29,2f
+
+ lwz 6,0(12)
+ addi 12,12,4
+ stw 6,0(3)
+ addi 3,3,4
+2: /* Copy 2-3 bytes. */
+ bf 30,1f
+
+ lhz 6,0(12)
+ sth 6,0(3)
+ bf 31,0f
+ lbz 7,2(12)
+ stb 7,2(3)
+
+ /* Return original DST pointer. */
+ mr 3,30
+ lwz 30,20(1)
+ addi 1,1,32
+ blr
+
+ .align 4
+1: /* Copy 1 byte. */
+ bf 31,0f
+
+ lbz 6,0(12)
+ stb 6,0(3)
+0: /* Return original DST pointer. */
+ mr 3,30
+ lwz 30,20(1)
+ addi 1,1,32
+ blr
+
+ /* Handles copies of 0~8 bytes. */
+ .align 4
+L(copy_LE_8):
+ bne cr6,4f
+
+ /* Though we could've used lfd/stfd here, they are still
+ slow for unaligned cases. */
+
+ lwz 6,0(4)
+ lwz 7,4(4)
+ stw 6,0(3)
+ stw 7,4(3)
+
+ /* Return original DST pointer. */
+ mr 3,30
+ lwz 30,20(1)
+ addi 1,1,32
+ blr
+
+ .align 4
+4: /* Copies 4~7 bytes. */
+ bf 29,2b
+
+ lwz 6,0(4)
+ stw 6,0(3)
+ bf 30,5f
+ lhz 7,4(4)
+ sth 7,4(3)
+ bf 31,0f
+ lbz 8,6(4)
+ stb 8,6(3)
+
+ /* Return original DST pointer. */
+ mr 3,30
+ lwz 30,20(1)
+ addi 1,1,32
+ blr
+
+ .align 4
+5: /* Copy 1 byte. */
+ bf 31,0f
+
+ lbz 6,4(4)
+ stb 6,4(3)
+
+0: /* Return original DST pointer. */
+ mr 3,30
+ lwz 30,20(1)
+ addi 1,1,32
+ blr
+
+ /* Handle copies of 32+ bytes where DST is aligned (to quadword) but
+ SRC is not. Use aligned quadword loads from SRC, shifted to realign
+ the data, allowing for aligned DST stores. */
+ .align 4
+L(copy_GE_32_unaligned):
+ andi. 11,3,15 /* Check alignment of DST. */
+ clrlwi 0,0,28 /* Number of bytes until the 1st
+ quadword of DST. */
+ srwi 9,5,4 /* Number of full quadwords remaining. */
+
+ beq L(copy_GE_32_unaligned_cont)
+
+ /* SRC is not quadword aligned, get it aligned. */
+
+ mtcrf 0x01,0
+ subf 31,0,5
+
+ /* Vector instructions work best when proper alignment (16-bytes)
+ is present. Move 0~15 bytes as needed to get DST quadword-aligned. */
+1: /* Copy 1 byte. */
+ bf 31,2f
+
+ lbz 6,0(12)
+ addi 12,12,1
+ stb 6,0(3)
+ addi 3,3,1
+2: /* Copy 2 bytes. */
+ bf 30,4f
+
+ lhz 6,0(12)
+ addi 12,12,2
+ sth 6,0(3)
+ addi 3,3,2
+4: /* Copy 4 bytes. */
+ bf 29,8f
+
+ lwz 6,0(12)
+ addi 12,12,4
+ stw 6,0(3)
+ addi 3,3,4
+8: /* Copy 8 bytes. */
+ bf 28,0f
+
+ lfd 6,0(12)
+ addi 12,12,8
+ stfd 6,0(3)
+ addi 3,3,8
+0:
+ clrlwi 10,12,28 /* Check alignment of SRC. */
+ srdi 9,31,4 /* Number of full quadwords remaining. */
+
+ /* The proper alignment is present, it is OK to copy the bytes now. */
+L(copy_GE_32_unaligned_cont):
+
+ /* Setup two indexes to speed up the indexed vector operations. */
+ clrlwi 11,31,28
+ li 6,16 /* Index for 16-bytes offsets. */
+ li 7,32 /* Index for 32-bytes offsets. */
+ cmplwi cr1,11,0
+ srdi 8,31,5 /* Setup the loop counter. */
+ mr 10,3
+ mr 11,12
+ mtcrf 0x01,9
+ cmplwi cr6,9,1
+ lvsl 5,0,12
+ lvx 3,0,12
+ bf 31,L(setup_unaligned_loop)
+
+ /* Copy another 16 bytes to align to 32-bytes due to the loop . */
+ lvx 4,12,6
+ vperm 6,3,4,5
+ addi 11,12,16
+ addi 10,3,16
+ stvx 6,0,3
+ vor 3,4,4
+
+L(setup_unaligned_loop):
+ mtctr 8
+ ble cr6,L(end_unaligned_loop)
+
+ /* Copy 32 bytes at a time using vector instructions. */
+ .align 4
+L(unaligned_loop):
+
+ /* Note: vr6/vr10 may contain data that was already copied,
+ but in order to get proper alignment, we may have to copy
+ some portions again. This is faster than having unaligned
+ vector instructions though. */
+
+ lvx 4,11,6 /* vr4 = r11+16. */
+ vperm 6,3,4,5 /* Merge the correctly-aligned portions
+ of vr3/vr4 into vr6. */
+ lvx 3,11,7 /* vr3 = r11+32. */
+ vperm 10,4,3,5 /* Merge the correctly-aligned portions
+ of vr3/vr4 into vr10. */
+ addi 11,11,32
+ stvx 6,0,10
+ stvx 10,10,6
+ addi 10,10,32
+
+ bdnz L(unaligned_loop)
+
+ .align 4
+L(end_unaligned_loop):
+
+ /* Check for tail bytes. */
+ clrrwi 0,31,4
+ mtcrf 0x01,31
+ beq cr1,0f
+
+ add 3,3,0
+ add 12,12,0
+
+ /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */
+8: /* Copy 8 bytes. */
+ bf 28,4f
+
+ lwz 6,0(12)
+ lwz 7,4(12)
+ addi 12,12,8
+ stw 6,0(3)
+ stw 7,4(3)
+ addi 3,3,8
+4: /* Copy 4 bytes. */
+ bf 29,2f
+
+ lwz 6,0(12)
+ addi 12,12,4
+ stw 6,0(3)
+ addi 3,3,4
+2: /* Copy 2~3 bytes. */
+ bf 30,1f
+
+ lhz 6,0(12)
+ addi 12,12,2
+ sth 6,0(3)
+ addi 3,3,2
+1: /* Copy 1 byte. */
+ bf 31,0f
+
+ lbz 6,0(12)
+ stb 6,0(3)
+0: /* Return original DST pointer. */
+ mr 3,30
+ lwz 30,20(1)
+ lwz 31,24(1)
+ addi 1,1,32
+ blr
+
+END (BP_SYM (memcpy))
+libc_hidden_builtin_def (memcpy)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/Implies b/libc/sysdeps/powerpc/powerpc64/power7/Implies
deleted file mode 100644
index 13b03309f..000000000
--- a/libc/sysdeps/powerpc/powerpc64/power7/Implies
+++ /dev/null
@@ -1 +0,0 @@
-powerpc/powerpc64/power5
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/fpu/Implies b/libc/sysdeps/powerpc/powerpc64/power7/fpu/Implies
deleted file mode 100644
index 13b03309f..000000000
--- a/libc/sysdeps/powerpc/powerpc64/power7/fpu/Implies
+++ /dev/null
@@ -1 +0,0 @@
-powerpc/powerpc64/power5
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S
new file mode 100644
index 000000000..6763d1adc
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S
@@ -0,0 +1,68 @@
+/* finite(). PowerPC64/POWER7 version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Luis Machado <luisgpm@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* int __finite(x) */
+ .section ".toc","aw"
+.LC0: /* 1.0 */
+ .tc FD_ONE[TC],0x3ff0000000000000
+ .section ".text"
+ .type __finite, @function
+ .machine power7
+EALIGN (__finite, 4, 0)
+ CALL_MCOUNT 0
+ lfd fp0,.LC0@toc(r2)
+ ftdiv cr7,fp1,fp0
+ li r3,1
+ bflr 30
+
+ /* If we are here, we either have +/-INF,
+ NaN or denormal. */
+
+ stfd fp1,-16(r1) /* Transfer FP to GPR's. */
+ ori 2,2,0 /* Force a new dispatch group. */
+
+ lhz r4,-16(r1) /* Fetch the upper portion of the high word of
+ the FP value (where the exponent and sign bits
+ are). */
+ clrlwi r4,r4,17 /* r4 = abs(r4). */
+ cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */
+ bltlr cr7 /* LT means finite, other non-finite. */
+ li r3,0
+ blr
+ END (__finite)
+
+hidden_def (__finite)
+weak_alias (__finite, finite)
+
+/* It turns out that the 'double' version will also always work for
+ single-precision. */
+strong_alias (__finite, __finitef)
+hidden_def (__finitef)
+weak_alias (__finitef, finitef)
+
+#ifndef IS_IN_libm
+# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0)
+compat_symbol (libc, __finite, __finitel, GLIBC_2_0);
+compat_symbol (libc, finite, finitel, GLIBC_2_0);
+# endif
+#endif
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S
new file mode 100644
index 000000000..54bd94176
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finitef.S
@@ -0,0 +1 @@
+/* This function uses the same code as s_finite.S. */
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S
new file mode 100644
index 000000000..f896d3802
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S
@@ -0,0 +1,71 @@
+/* isinf(). PowerPC64/POWER7 version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Luis Machado <luisgpm@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* int __isinf(x) */
+ .section ".toc","aw"
+.LC0: /* 1.0 */
+ .tc FD_ONE[TC],0x3ff0000000000000
+ .section ".text"
+ .type __isinf, @function
+ .machine power7
+EALIGN (__isinf, 4, 0)
+ CALL_MCOUNT 0
+ lfd fp0,.LC0@toc(r2)
+ ftdiv cr7,fp1,fp0
+ li r3,0
+ bflr 29 /* If not INF, return. */
+
+ /* Either we have -INF/+INF or a denormal. */
+
+ stfd fp1,-16(r1) /* Transfer FP to GPR's. */
+ ori 2,2,0 /* Force a new dispatch group. */
+ lhz r4,-16(r1) /* Fetch the upper portion of the high word of
+ the FP value (where the exponent and sign bits
+ are). */
+ cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */
+ li r3,1
+ beqlr cr7 /* EQ means INF, otherwise -INF. */
+ li r3,-1
+ blr
+ END (__isinf)
+
+hidden_def (__isinf)
+weak_alias (__isinf, isinf)
+
+/* It turns out that the 'double' version will also always work for
+ single-precision. */
+strong_alias (__isinf, __isinff)
+hidden_def (__isinff)
+weak_alias (__isinff, isinff)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__isinf, __isinfl)
+weak_alias (__isinf, isinfl)
+#endif
+
+#ifndef IS_IN_libm
+# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0)
+compat_symbol (libc, __isinf, __isinfl, GLIBC_2_0);
+compat_symbol (libc, isinf, isinfl, GLIBC_2_0);
+# endif
+#endif
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S
new file mode 100644
index 000000000..be759e091
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinff.S
@@ -0,0 +1 @@
+/* This function uses the same code as s_isinf.S. */
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S
new file mode 100644
index 000000000..887701259
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isnan.S
@@ -0,0 +1,69 @@
+/* isnan(). PowerPC64/POWER7 version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Luis Machado <luisgpm@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* int __isnan(x) */
+ .section ".toc","aw"
+.LC0: /* 1.0 */
+ .tc FD_ONE[TC],0x3ff0000000000000
+ .section ".text"
+ .type __isnan, @function
+ .machine power7
+EALIGN (__isnan, 4, 0)
+ CALL_MCOUNT 0
+ lfd fp0,.LC0@toc(r2)
+ ftdiv cr7,fp1,fp0
+ li r3,0
+ bflr 30 /* If not NaN, finish. */
+
+ stfd fp1,-16(r1) /* Transfer FP to GPR's. */
+ ori 2,2,0 /* Force a new dispatch group. */
+ ld r4,-16(r1) /* Load FP into GPR. */
+ lis r0,0x7ff0
+ sldi r0,r0,32 /* const long r0 0x7ff00000 00000000. */
+ clrldi r4,r4,1 /* x = fabs(x) */
+ cmpd cr7,r4,r0 /* if (fabs(x) <= inf) */
+ blelr cr7 /* LE means not NaN. */
+ li r3,1 /* else return 1 */
+ blr
+ END (__isnan)
+
+hidden_def (__isnan)
+weak_alias (__isnan, isnan)
+
+/* It turns out that the 'double' version will also always work for
+ single-precision. */
+strong_alias (__isnan, __isnanf)
+hidden_def (__isnanf)
+weak_alias (__isnanf, isnanf)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__isnan, __isnanl)
+weak_alias (__isnan, isnanl)
+#endif
+
+#ifndef IS_IN_libm
+# if LONG_DOUBLE_COMPAT(libc, GLIBC_2_0)
+compat_symbol (libc, __isnan, __isnanl, GLIBC_2_0);
+compat_symbol (libc, isnan, isnanl, GLIBC_2_0);
+# endif
+#endif
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S
new file mode 100644
index 000000000..b48c85e0d
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isnanf.S
@@ -0,0 +1 @@
+/* This function uses the same code as s_isnan.S. */
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S b/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S
new file mode 100644
index 000000000..2e5beed15
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S
@@ -0,0 +1,449 @@
+/* Optimized memcpy implementation for PowerPC64/POWER7.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by Luis Machado <luisgpm@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA
+ 02110-1301 USA. */
+
+#include <sysdep.h>
+#include <bp-sym.h>
+#include <bp-asm.h>
+
+
+/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
+ Returns 'dst'. */
+
+ .machine power7
+EALIGN (BP_SYM (memcpy), 5, 0)
+ CALL_MCOUNT 3
+
+ cmpldi cr1,5,31
+ neg 0,3
+ std 3,-16(1)
+ std 31,-8(1)
+ cfi_offset(31,-8)
+ ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move
+ code. */
+
+ andi. 11,3,7 /* Check alignment of DST. */
+
+
+ clrldi 10,4,61 /* Check alignment of SRC. */
+ cmpld cr6,10,11 /* SRC and DST alignments match? */
+ mr 12,4
+ mr 31,5
+ bne cr6,L(copy_GE_32_unaligned)
+
+ srdi 9,5,3 /* Number of full quadwords remaining. */
+
+ beq L(copy_GE_32_aligned_cont)
+
+ clrldi 0,0,61
+ mtcrf 0x01,0
+ subf 31,0,5
+
+ /* Get the SRC aligned to 8 bytes. */
+
+1: bf 31,2f
+ lbz 6,0(12)
+ addi 12,12,1
+ stb 6,0(3)
+ addi 3,3,1
+2: bf 30,4f
+ lhz 6,0(12)
+ addi 12,12,2
+ sth 6,0(3)
+ addi 3,3,2
+4: bf 29,0f
+ lwz 6,0(12)
+ addi 12,12,4
+ stw 6,0(3)
+ addi 3,3,4
+0:
+ clrldi 10,12,61 /* Check alignment of SRC again. */
+ srdi 9,31,3 /* Number of full doublewords remaining. */
+
+L(copy_GE_32_aligned_cont):
+
+ clrldi 11,31,61
+ mtcrf 0x01,9
+
+ srdi 8,31,5
+ cmpldi cr1,9,4
+ cmpldi cr6,11,0
+ mr 11,12
+
+ /* Copy 1~3 doublewords so the main loop starts
+ at a multiple of 32 bytes. */
+
+ bf 30,1f
+ ld 6,0(12)
+ ld 7,8(12)
+ addi 11,12,16
+ mtctr 8
+ std 6,0(3)
+ std 7,8(3)
+ addi 10,3,16
+ bf 31,4f
+ ld 0,16(12)
+ std 0,16(3)
+ blt cr1,3f
+ addi 11,12,24
+ addi 10,3,24
+ b 4f
+
+ .align 4
+1: /* Copy 1 doubleword and set the counter. */
+ mr 10,3
+ mtctr 8
+ bf 31,4f
+ ld 6,0(12)
+ addi 11,12,8
+ std 6,0(3)
+ addi 10,3,8
+
+ /* Main aligned copy loop. Copies 32-bytes at a time. */
+ .align 4
+4:
+ ld 6,0(11)
+ ld 7,8(11)
+ ld 8,16(11)
+ ld 0,24(11)
+ addi 11,11,32
+
+ std 6,0(10)
+ std 7,8(10)
+ std 8,16(10)
+ std 0,24(10)
+ addi 10,10,32
+ bdnz 4b
+3:
+
+ /* Check for tail bytes. */
+ rldicr 0,31,0,60
+ mtcrf 0x01,31
+ beq cr6,0f
+
+.L9:
+ add 3,3,0
+ add 12,12,0
+
+ /* At this point we have a tail of 0-7 bytes and we know that the
+ destination is doubleword-aligned. */
+4: /* Copy 4 bytes. */
+ bf 29,2f
+
+ lwz 6,0(12)
+ addi 12,12,4
+ stw 6,0(3)
+ addi 3,3,4
+2: /* Copy 2 bytes. */
+ bf 30,1f
+
+ lhz 6,0(12)
+ addi 12,12,2
+ sth 6,0(3)
+ addi 3,3,2
+1: /* Copy 1 byte. */
+ bf 31,0f
+
+ lbz 6,0(12)
+ stb 6,0(3)
+0: /* Return original DST pointer. */
+ ld 31,-8(1)
+ ld 3,-16(1)
+ blr
+
+ /* Handle copies of 0~31 bytes. */
+ .align 4
+L(copy_LT_32):
+ cmpldi cr6,5,8
+ mr 12,4
+ mtcrf 0x01,5
+ ble cr6,L(copy_LE_8)
+
+ /* At least 9 bytes to go. */
+ neg 8,4
+ clrrdi 11,4,2
+ andi. 0,8,3
+ cmpldi cr1,5,16
+ mr 10,5
+ beq L(copy_LT_32_aligned)
+
+ /* Force 4-bytes alignment for SRC. */
+ mtocrf 0x01,0
+ subf 10,0,5
+2: bf 30,1f
+
+ lhz 6,0(12)
+ addi 12,12,2
+ sth 6,0(3)
+ addi 3,3,2
+1: bf 31,L(end_4bytes_alignment)
+
+ lbz 6,0(12)
+ addi 12,12,1
+ stb 6,0(3)
+ addi 3,3,1
+
+ .align 4
+L(end_4bytes_alignment):
+ cmpldi cr1,10,16
+ mtcrf 0x01,10
+
+L(copy_LT_32_aligned):
+ /* At least 6 bytes to go, and SRC is word-aligned. */
+ blt cr1,8f
+
+ /* Copy 16 bytes. */
+ lwz 6,0(12)
+ lwz 7,4(12)
+ stw 6,0(3)
+ lwz 8,8(12)
+ stw 7,4(3)
+ lwz 6,12(12)
+ addi 12,12,16
+ stw 8,8(3)
+ stw 6,12(3)
+ addi 3,3,16
+8: /* Copy 8 bytes. */
+ bf 28,4f
+
+ lwz 6,0(12)
+ lwz 7,4(12)
+ addi 12,12,8
+ stw 6,0(3)
+ stw 7,4(3)
+ addi 3,3,8
+4: /* Copy 4 bytes. */
+ bf 29,2f
+
+ lwz 6,0(12)
+ addi 12,12,4
+ stw 6,0(3)
+ addi 3,3,4
+2: /* Copy 2-3 bytes. */
+ bf 30,1f
+
+ lhz 6,0(12)
+ sth 6,0(3)
+ bf 31,0f
+ lbz 7,2(12)
+ stb 7,2(3)
+ ld 3,-16(1)
+ blr
+
+ .align 4
+1: /* Copy 1 byte. */
+ bf 31,0f
+
+ lbz 6,0(12)
+ stb 6,0(3)
+0: /* Return original DST pointer. */
+ ld 3,-16(1)
+ blr
+
+ /* Handles copies of 0~8 bytes. */
+ .align 4
+L(copy_LE_8):
+ bne cr6,4f
+
+ /* Though we could've used ld/std here, they are still
+ slow for unaligned cases. */
+
+ lwz 6,0(4)
+ lwz 7,4(4)
+ stw 6,0(3)
+ stw 7,4(3)
+ ld 3,-16(1) /* Return original DST pointers. */
+ blr
+
+ .align 4
+4: /* Copies 4~7 bytes. */
+ bf 29,2b
+
+ lwz 6,0(4)
+ stw 6,0(3)
+ bf 30,5f
+ lhz 7,4(4)
+ sth 7,4(3)
+ bf 31,0f
+ lbz 8,6(4)
+ stb 8,6(3)
+ ld 3,-16(1)
+ blr
+
+ .align 4
+5: /* Copy 1 byte. */
+ bf 31,0f
+
+ lbz 6,4(4)
+ stb 6,4(3)
+
+0: /* Return original DST pointer. */
+ ld 3,-16(1)
+ blr
+
+ /* Handle copies of 32+ bytes where DST is aligned (to quadword) but
+ SRC is not. Use aligned quadword loads from SRC, shifted to realign
+ the data, allowing for aligned DST stores. */
+ .align 4
+L(copy_GE_32_unaligned):
+ clrldi 0,0,60 /* Number of bytes until the 1st
+ quadword. */
+ andi. 11,3,15 /* Check alignment of DST (against
+ quadwords). */
+ srdi 9,5,4 /* Number of full quadwords remaining. */
+
+ beq L(copy_GE_32_unaligned_cont)
+
+ /* SRC is not quadword aligned, get it aligned. */
+
+ mtcrf 0x01,0
+ subf 31,0,5
+
+ /* Vector instructions work best when proper alignment (16-bytes)
+ is present. Move 0~15 bytes as needed to get DST quadword-aligned. */
+1: /* Copy 1 byte. */
+ bf 31,2f
+
+ lbz 6,0(12)
+ addi 12,12,1
+ stb 6,0(3)
+ addi 3,3,1
+2: /* Copy 2 bytes. */
+ bf 30,4f
+
+ lhz 6,0(12)
+ addi 12,12,2
+ sth 6,0(3)
+ addi 3,3,2
+4: /* Copy 4 bytes. */
+ bf 29,8f
+
+ lwz 6,0(12)
+ addi 12,12,4
+ stw 6,0(3)
+ addi 3,3,4
+8: /* Copy 8 bytes. */
+ bf 28,0f
+
+ ld 6,0(12)
+ addi 12,12,8
+ std 6,0(3)
+ addi 3,3,8
+0:
+ clrldi 10,12,60 /* Check alignment of SRC. */
+ srdi 9,31,4 /* Number of full quadwords remaining. */
+
+ /* The proper alignment is present, it is OK to copy the bytes now. */
+L(copy_GE_32_unaligned_cont):
+
+ /* Setup two indexes to speed up the indexed vector operations. */
+ clrldi 11,31,60
+ li 6,16 /* Index for 16-bytes offsets. */
+ li 7,32 /* Index for 32-bytes offsets. */
+ cmpldi cr1,11,0
+ srdi 8,31,5 /* Setup the loop counter. */
+ mr 10,3
+ mr 11,12
+ mtcrf 0x01,9
+ cmpldi cr6,9,1
+ lvsl 5,0,12
+ lvx 3,0,12
+ bf 31,L(setup_unaligned_loop)
+
+ /* Copy another 16 bytes to align to 32-bytes due to the loop . */
+ lvx 4,12,6
+ vperm 6,3,4,5
+ addi 11,12,16
+ addi 10,3,16
+ stvx 6,0,3
+ vor 3,4,4
+
+L(setup_unaligned_loop):
+ mtctr 8
+ ble cr6,L(end_unaligned_loop)
+
+ /* Copy 32 bytes at a time using vector instructions. */
+ .align 4
+L(unaligned_loop):
+
+ /* Note: vr6/vr10 may contain data that was already copied,
+ but in order to get proper alignment, we may have to copy
+ some portions again. This is faster than having unaligned
+ vector instructions though. */
+
+ lvx 4,11,6 /* vr4 = r11+16. */
+ vperm 6,3,4,5 /* Merge the correctly-aligned portions
+ of vr3/vr4 into vr6. */
+ lvx 3,11,7 /* vr3 = r11+32. */
+ vperm 10,4,3,5 /* Merge the correctly-aligned portions
+ of vr3/vr4 into vr10. */
+ addi 11,11,32
+ stvx 6,0,10
+ stvx 10,10,6
+ addi 10,10,32
+
+ bdnz L(unaligned_loop)
+
+ .align 4
+L(end_unaligned_loop):
+
+ /* Check for tail bytes. */
+ rldicr 0,31,0,59
+ mtcrf 0x01,31
+ beq cr1,0f
+
+ add 3,3,0
+ add 12,12,0
+
+ /* We have 1~15 tail bytes to copy, and DST is quadword aligned. */
+8: /* Copy 8 bytes. */
+ bf 28,4f
+
+ lwz 6,0(12)
+ lwz 7,4(12)
+ addi 12,12,8
+ stw 6,0(3)
+ stw 7,4(3)
+ addi 3,3,8
+4: /* Copy 4 bytes. */
+ bf 29,2f
+
+ lwz 6,0(12)
+ addi 12,12,4
+ stw 6,0(3)
+ addi 3,3,4
+2: /* Copy 2~3 bytes. */
+ bf 30,1f
+
+ lhz 6,0(12)
+ addi 12,12,2
+ sth 6,0(3)
+ addi 3,3,2
+1: /* Copy 1 byte. */
+ bf 31,0f
+
+ lbz 6,0(12)
+ stb 6,0(3)
+0: /* Return original DST pointer. */
+ ld 31,-8(1)
+ ld 3,-16(1)
+ blr
+
+END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS)
+libc_hidden_builtin_def (memcpy)
diff --git a/libc/sysdeps/s390/fpu/fegetenv.c b/libc/sysdeps/s390/fpu/fegetenv.c
index a244f2ca8..04da54c94 100644
--- a/libc/sysdeps/s390/fpu/fegetenv.c
+++ b/libc/sysdeps/s390/fpu/fegetenv.c
@@ -20,10 +20,6 @@
#include <fenv_libc.h>
#include <fpu_control.h>
-#include <stddef.h>
-#include <asm/ptrace.h>
-#include <sys/ptrace.h>
-#include <unistd.h>
int
fegetenv (fenv_t *envp)
@@ -33,3 +29,4 @@ fegetenv (fenv_t *envp)
/* Success. */
return 0;
}
+libm_hidden_def (fegetenv)
diff --git a/libc/sysdeps/s390/s390-64/utf16-utf32-z9.c b/libc/sysdeps/s390/s390-64/utf16-utf32-z9.c
index 868dea68c..14daf2118 100644
--- a/libc/sysdeps/s390/s390-64/utf16-utf32-z9.c
+++ b/libc/sysdeps/s390/s390-64/utf16-utf32-z9.c
@@ -203,7 +203,10 @@ gconv_end (struct __gconv_step *data)
swapping). */
#define BODY \
{ \
- if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
+ /* The hardware instruction currently fails to report an error for \
+ isolated low surrogates so we have to disable the instruction \
+ until this gets resolved. */ \
+ if (0) /* (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) */ \
{ \
HARDWARE_CONVERT ("cu24 %0, %1, 1"); \
if (inptr != inend) \
@@ -229,6 +232,12 @@ gconv_end (struct __gconv_step *data)
} \
else \
{ \
+ /* An isolated low-surrogate was found. This has to be \
+ considered ill-formed. */ \
+ if (__builtin_expect (u1 >= 0xdc00, 0)) \
+ { \
+ STANDARD_FROM_LOOP_ERR_HANDLER (2); \
+ } \
/* It's a surrogate character. At least the first word says \
it is. */ \
if (__builtin_expect (inptr + 4 > inend, 0)) \
diff --git a/libc/sysdeps/s390/s390-64/utf8-utf16-z9.c b/libc/sysdeps/s390/s390-64/utf8-utf16-z9.c
index 531d3ebd4..5f73f3c59 100644
--- a/libc/sysdeps/s390/s390-64/utf8-utf16-z9.c
+++ b/libc/sysdeps/s390/s390-64/utf8-utf16-z9.c
@@ -345,9 +345,12 @@ gconv_end (struct __gconv_step *data)
Operation. */
#define BODY \
{ \
- if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
+ /* The hardware instruction currently fails to report an error for \
+ isolated low surrogates so we have to disable the instruction \
+ until this gets resolved. */ \
+ if (0) /* (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) */ \
{ \
- HARDWARE_CONVERT ("cu21 %0, %1"); \
+ HARDWARE_CONVERT ("cu21 %0, %1, 1"); \
if (inptr != inend) \
{ \
/* Check if the third byte is \
@@ -388,7 +391,7 @@ gconv_end (struct __gconv_step *data)
\
outptr += 2; \
} \
- else if (c >= 0x0800 && c <= 0xd7ff) \
+ else if ((c >= 0x0800 && c <= 0xd7ff) || c > 0xdfff) \
{ \
/* Three byte UTF-8 char. */ \
\
diff --git a/libc/sysdeps/sh/sh4/fpu/fegetenv.c b/libc/sysdeps/sh/sh4/fpu/fegetenv.c
index c07b32af3..683939b52 100644
--- a/libc/sysdeps/sh/sh4/fpu/fegetenv.c
+++ b/libc/sysdeps/sh/sh4/fpu/fegetenv.c
@@ -29,3 +29,4 @@ fegetenv (fenv_t *envp)
return 0;
}
+libm_hidden_def (fegetenv)
diff --git a/libc/sysdeps/sparc/Makefile b/libc/sysdeps/sparc/Makefile
index 73b926554..735e4a40d 100644
--- a/libc/sysdeps/sparc/Makefile
+++ b/libc/sysdeps/sparc/Makefile
@@ -10,3 +10,8 @@ endif
ifeq ($(subdir),db2)
CPPFLAGS += -DHAVE_SPINLOCKS=1 -DHAVE_ASSEM_SPARC_GCC=1
endif
+
+ifeq ($(subdir),csu)
+# get offset to rtld_global._dl_hwcap
+gen-as-const-headers += rtld-global-offsets.sym
+endif
diff --git a/libc/sysdeps/sparc/elf/rtld-global-offsets.sym b/libc/sysdeps/sparc/elf/rtld-global-offsets.sym
new file mode 100644
index 000000000..ff4e97f2a
--- /dev/null
+++ b/libc/sysdeps/sparc/elf/rtld-global-offsets.sym
@@ -0,0 +1,7 @@
+#define SHARED 1
+
+#include <ldsodefs.h>
+
+#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem)
+
+RTLD_GLOBAL_RO_DL_HWCAP_OFFSET rtld_global_ro_offsetof (_dl_hwcap)
diff --git a/libc/sysdeps/sparc/fpu/fegetenv.c b/libc/sysdeps/sparc/fpu/fegetenv.c
index 36486f597..c606a9cac 100644
--- a/libc/sysdeps/sparc/fpu/fegetenv.c
+++ b/libc/sysdeps/sparc/fpu/fegetenv.c
@@ -34,4 +34,5 @@ strong_alias (__fegetenv, __old_fegetenv)
compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1);
#endif
+libm_hidden_ver (__fegetenv, fegetenv)
versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2);
diff --git a/libc/sysdeps/sparc/sparc32/bcopy.c b/libc/sysdeps/sparc/sparc32/bcopy.c
deleted file mode 100644
index 9a455f33c..000000000
--- a/libc/sysdeps/sparc/sparc32/bcopy.c
+++ /dev/null
@@ -1 +0,0 @@
-/* bcopy is in memcpy.S */
diff --git a/libc/sysdeps/sparc/sparc32/dl-irel.h b/libc/sysdeps/sparc/sparc32/dl-irel.h
new file mode 100644
index 000000000..1891938d6
--- /dev/null
+++ b/libc/sysdeps/sparc/sparc32/dl-irel.h
@@ -0,0 +1,55 @@
+/* Machine-dependent ELF indirect relocation inline functions.
+ SPARC 32-bit version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _DL_IREL_H
+#define _DL_IREL_H
+
+#include <stdio.h>
+#include <unistd.h>
+#include <dl-plt.h>
+
+#define ELF_MACHINE_IRELA 1
+
+static inline void
+__attribute ((always_inline))
+elf_irela (const Elf32_Rela *reloc)
+{
+ unsigned int r_type = ELF32_R_TYPE (reloc->r_info);
+
+ if (__builtin_expect (r_type == R_SPARC_IRELATIVE, 1))
+ {
+ Elf32_Addr *const reloc_addr = (void *) reloc->r_offset;
+ Elf32_Addr value = ((Elf32_Addr (*) (void)) reloc->r_addend) ();
+ *reloc_addr = value;
+ }
+ else if (__builtin_expect (r_type == R_SPARC_JMP_IREL, 1))
+ {
+ Elf32_Addr *const reloc_addr = (void *) reloc->r_offset;
+ Elf32_Addr value = ((Elf32_Addr (*) (void)) reloc->r_addend) ();
+
+ sparc_fixup_plt (reloc, reloc_addr, value, 0, 1);
+ }
+ else if (r_type == R_SPARC_NONE)
+ ;
+ else
+ __libc_fatal ("unexpected reloc type in static binary");
+}
+
+#endif /* dl-irel.h */
diff --git a/libc/sysdeps/sparc/sparc32/dl-machine.h b/libc/sysdeps/sparc/sparc32/dl-machine.h
index b3b7852d8..9631db32e 100644
--- a/libc/sysdeps/sparc/sparc32/dl-machine.h
+++ b/libc/sysdeps/sparc/sparc32/dl-machine.h
@@ -1,5 +1,5 @@
/* Machine-dependent ELF dynamic relocation inline functions. SPARC version.
- Copyright (C) 1996-2003, 2004, 2005, 2006, 2007
+ Copyright (C) 1996-2003, 2004, 2005, 2006, 2007, 2010
Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -27,20 +27,13 @@
#include <sys/param.h>
#include <ldsodefs.h>
#include <tls.h>
+#include <dl-plt.h>
#ifndef VALIDX
# define VALIDX(tag) (DT_NUM + DT_THISPROCNUM + DT_VERSIONTAGNUM \
+ DT_EXTRANUM + DT_VALTAGIDX (tag))
#endif
-/* Some SPARC opcodes we need to use for self-modifying code. */
-#define OPCODE_NOP 0x01000000 /* nop */
-#define OPCODE_CALL 0x40000000 /* call ?; add PC-rel word address */
-#define OPCODE_SETHI_G1 0x03000000 /* sethi ?, %g1; add value>>10 */
-#define OPCODE_JMP_G1 0x81c06000 /* jmp %g1+?; add lo 10 bits of value */
-#define OPCODE_SAVE_SP 0x9de3bfa8 /* save %sp, -(16+6)*4, %sp */
-#define OPCODE_BA 0x30800000 /* b,a ?; add PC-rel word address */
-
/* Return nonzero iff ELF header is compatible with the running host. */
static inline int
elf_machine_matches_host (const Elf32_Ehdr *ehdr)
@@ -173,15 +166,19 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
in .rela.plt. */
while (rela < relaend)
{
- *(unsigned int *) rela->r_offset
- = OPCODE_SETHI_G1 | (rela->r_offset - (Elf32_Addr) plt);
- *(unsigned int *) (rela->r_offset + 4)
+ *(unsigned int *) (rela->r_offset + l->l_addr)
+ = OPCODE_SETHI_G1 | (rela->r_offset + l->l_addr
+ - (Elf32_Addr) plt);
+ *(unsigned int *) (rela->r_offset + l->l_addr + 4)
= OPCODE_BA | ((((Elf32_Addr) plt
- - rela->r_offset - 4) >> 2) & 0x3fffff);
+ - rela->r_offset - l->l_addr - 4) >> 2)
+ & 0x3fffff);
if (do_flush)
{
- __asm __volatile ("flush %0" : : "r"(rela->r_offset));
- __asm __volatile ("flush %0+4" : : "r"(rela->r_offset));
+ __asm __volatile ("flush %0" : : "r" (rela->r_offset
+ + l->l_addr));
+ __asm __volatile ("flush %0+4" : : "r" (rela->r_offset
+ + l->l_addr));
}
++rela;
}
@@ -312,41 +309,6 @@ _dl_start_user:\n\
.size _dl_start_user, . - _dl_start_user\n\
.previous");
-static inline __attribute__ ((always_inline)) Elf32_Addr
-sparc_fixup_plt (const Elf32_Rela *reloc, Elf32_Addr *reloc_addr,
- Elf32_Addr value, int t, int do_flush)
-{
- Elf32_Sword disp = value - (Elf32_Addr) reloc_addr;
-
- if (0 && disp >= -0x800000 && disp < 0x800000)
- {
- /* Don't need to worry about thread safety. We're writing just one
- instruction. */
-
- reloc_addr[0] = OPCODE_BA | ((disp >> 2) & 0x3fffff);
- if (do_flush)
- __asm __volatile ("flush %0" : : "r"(reloc_addr));
- }
- else
- {
- /* For thread safety, write the instructions from the bottom and
- flush before we overwrite the critical "b,a". This of course
- need not be done during bootstrapping, since there are no threads.
- But we also can't tell if we _can_ use flush, so don't. */
-
- reloc_addr += t;
- reloc_addr[1] = OPCODE_JMP_G1 | (value & 0x3ff);
- if (do_flush)
- __asm __volatile ("flush %0+4" : : "r"(reloc_addr));
-
- reloc_addr[0] = OPCODE_SETHI_G1 | (value >> 10);
- if (do_flush)
- __asm __volatile ("flush %0" : : "r"(reloc_addr));
- }
-
- return value;
-}
-
static inline Elf32_Addr
elf_machine_fixup_plt (struct link_map *map, lookup_t t,
const Elf32_Rela *reloc,
@@ -433,6 +395,13 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
value += reloc->r_addend; /* Assume copy relocs have zero addend. */
+ if (sym != NULL
+ && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0)
+ && __builtin_expect (sym->st_shndx != SHN_UNDEF, 1))
+ {
+ value = ((Elf32_Addr (*) (void)) value) ();
+ }
+
switch (r_type)
{
#if !defined RTLD_BOOTSTRAP && !defined RESOLVE_CONFLICT_FIND_MAP
@@ -460,6 +429,13 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
case R_SPARC_32:
*reloc_addr = value;
break;
+ case R_SPARC_IRELATIVE:
+ value = ((Elf32_Addr (*) (void)) value) ();
+ *reloc_addr = value;
+ break;
+ case R_SPARC_JMP_IREL:
+ value = ((Elf32_Addr (*) (void)) value) ();
+ /* Fall thru */
case R_SPARC_JMP_SLOT:
{
#if !defined RTLD_BOOTSTRAP && !defined __sparc_v9__
@@ -578,16 +554,21 @@ __attribute__ ((always_inline))
elf_machine_lazy_rel (struct link_map *map,
Elf32_Addr l_addr, const Elf32_Rela *reloc)
{
- switch (ELF32_R_TYPE (reloc->r_info))
+ Elf32_Addr *const reloc_addr = (void *) (l_addr + reloc->r_offset);
+ const unsigned int r_type = ELF32_R_TYPE (reloc->r_info);
+
+ if (__builtin_expect (r_type == R_SPARC_JMP_SLOT, 1))
+ ;
+ else if (r_type == R_SPARC_JMP_IREL)
{
- case R_SPARC_NONE:
- break;
- case R_SPARC_JMP_SLOT:
- break;
- default:
- _dl_reloc_bad_type (map, ELFW(R_TYPE) (reloc->r_info), 1);
- break;
+ Elf32_Addr value = map->l_addr + reloc->r_addend;
+ value = ((Elf32_Addr (*) (void)) value) ();
+ sparc_fixup_plt (reloc, reloc_addr, value, 1, 1);
}
+ else if (r_type == R_SPARC_NONE)
+ ;
+ else
+ _dl_reloc_bad_type (map, r_type, 1);
}
#endif /* RESOLVE_MAP */
diff --git a/libc/sysdeps/sparc/sparc32/dl-plt.h b/libc/sysdeps/sparc/sparc32/dl-plt.h
new file mode 100644
index 000000000..bfb891fe6
--- /dev/null
+++ b/libc/sysdeps/sparc/sparc32/dl-plt.h
@@ -0,0 +1,97 @@
+/* PLT fixups. Sparc 32-bit version.
+ Copyright (C) 1996-2003, 2004, 2005, 2006, 2007, 2010
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* Some SPARC opcodes we need to use for self-modifying code. */
+#define OPCODE_NOP 0x01000000 /* nop */
+#define OPCODE_CALL 0x40000000 /* call ?; add PC-rel word address */
+#define OPCODE_SETHI_G1 0x03000000 /* sethi ?, %g1; add value>>10 */
+#define OPCODE_JMP_G1 0x81c06000 /* jmp %g1+?; add lo 10 bits of value */
+#define OPCODE_SAVE_SP 0x9de3bfa8 /* save %sp, -(16+6)*4, %sp */
+#define OPCODE_BA 0x30800000 /* b,a ?; add PC-rel word address */
+#define OPCODE_BA_PT 0x30480000 /* ba,a,pt %icc, ?; add PC-rel word address */
+
+static inline __attribute__ ((always_inline)) Elf32_Addr
+sparc_fixup_plt (const Elf32_Rela *reloc, Elf32_Addr *reloc_addr,
+ Elf32_Addr value, int t, int do_flush)
+{
+ Elf32_Sword disp;
+
+ /* 't' is '0' if we are resolving this PLT entry for RTLD bootstrap,
+ in which case we'll be resolving all PLT entries and thus can
+ optimize by overwriting instructions starting at the first PLT entry
+ instruction and we need not be mindful of thread safety.
+
+ Otherwise, 't' is '1'. */
+ reloc_addr += t;
+ disp = value - (Elf32_Addr) reloc_addr;
+
+ if (disp >= -0x800000 && disp < 0x800000)
+ {
+ unsigned int insn = OPCODE_BA | ((disp >> 2) & 0x3fffff);
+
+#ifdef __sparc_v9__
+ /* On V9 we can do even better by using a branch with
+ prediction if we fit into the even smaller 19-bit
+ displacement field. */
+ if (disp >= -0x100000 && disp < 0x100000)
+ insn = OPCODE_BA_PT | ((disp >> 2) & 0x07ffff);
+#endif
+
+ /* Even if we are writing just a single branch, we must not
+ ignore the 't' offset. Consider a case where we have some
+ PLT slots which can be optimized into a single branch and
+ some which cannot. Then we can end up with a PLT which looks
+ like:
+
+ PLT4.0: sethi %(PLT_4_INDEX), %g1
+ sethi %(fully_resolved_sym_4), %g1
+ jmp %g1 + %lo(fully_resolved_sym_4)
+ PLT5.0: ba,a fully_resolved_sym_5
+ ba,a PLT0.0
+ ...
+
+ The delay slot of that jmp must always be either a sethi to
+ %g1 or a nop. But if we try to place this displacement
+ branch there, PLT4.0 will jump to fully_resolved_sym_4 for 1
+ instruction and then go immediately to
+ fully_resolved_sym_5. */
+
+ reloc_addr[0] = insn;
+ if (do_flush)
+ __asm __volatile ("flush %0" : : "r"(reloc_addr));
+ }
+ else
+ {
+ /* For thread safety, write the instructions from the bottom and
+ flush before we overwrite the critical "b,a". This of course
+ need not be done during bootstrapping, since there are no threads.
+ But we also can't tell if we _can_ use flush, so don't. */
+
+ reloc_addr[1] = OPCODE_JMP_G1 | (value & 0x3ff);
+ if (do_flush)
+ __asm __volatile ("flush %0+4" : : "r"(reloc_addr));
+
+ reloc_addr[0] = OPCODE_SETHI_G1 | (value >> 10);
+ if (do_flush)
+ __asm __volatile ("flush %0" : : "r"(reloc_addr));
+ }
+
+ return value;
+}
diff --git a/libc/sysdeps/sparc/sparc32/memcpy.S b/libc/sysdeps/sparc/sparc32/memcpy.S
index 6bd55c06a..748a0862f 100644
--- a/libc/sysdeps/sparc/sparc32/memcpy.S
+++ b/libc/sysdeps/sparc/sparc32/memcpy.S
@@ -68,45 +68,6 @@
stb %t0, [%dst - offset - 0x02]; \
stb %t1, [%dst - offset - 0x01];
-/* Both these macros have to start with exactly the same insn */
-#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
- ldd [%src - offset - 0x20], %t0; \
- ldd [%src - offset - 0x18], %t2; \
- ldd [%src - offset - 0x10], %t4; \
- ldd [%src - offset - 0x08], %t6; \
- st %t0, [%dst - offset - 0x20]; \
- st %t1, [%dst - offset - 0x1c]; \
- st %t2, [%dst - offset - 0x18]; \
- st %t3, [%dst - offset - 0x14]; \
- st %t4, [%dst - offset - 0x10]; \
- st %t5, [%dst - offset - 0x0c]; \
- st %t6, [%dst - offset - 0x08]; \
- st %t7, [%dst - offset - 0x04];
-
-#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
- ldd [%src - offset - 0x20], %t0; \
- ldd [%src - offset - 0x18], %t2; \
- ldd [%src - offset - 0x10], %t4; \
- ldd [%src - offset - 0x08], %t6; \
- std %t0, [%dst - offset - 0x20]; \
- std %t2, [%dst - offset - 0x18]; \
- std %t4, [%dst - offset - 0x10]; \
- std %t6, [%dst - offset - 0x08];
-
-#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldd [%src + offset + 0x00], %t0; \
- ldd [%src + offset + 0x08], %t2; \
- st %t0, [%dst + offset + 0x00]; \
- st %t1, [%dst + offset + 0x04]; \
- st %t2, [%dst + offset + 0x08]; \
- st %t3, [%dst + offset + 0x0c];
-
-#define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
- ldub [%src + offset + 0x00], %t0; \
- ldub [%src + offset + 0x01], %t1; \
- stb %t0, [%dst + offset + 0x00]; \
- stb %t1, [%dst + offset + 0x01];
-
#define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \
ldd [%src + offset + 0x00], %t0; \
ldd [%src + offset + 0x08], %t2; \
@@ -146,295 +107,20 @@
.text
.align 4
-ENTRY(bcopy)
- mov %o0, %o3
- mov %o1, %o0
- mov %o3, %o1
-END(bcopy)
-
-ENTRY(memmove)
- cmp %o0, %o1
+ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */
+ sub %o0, %o1, %o4
st %o0, [%sp + 64]
- bleu 9f
- sub %o0, %o1, %o4
-
- add %o1, %o2, %o3
- cmp %o3, %o0
- bleu 0f
- andcc %o4, 3, %o5
-
- add %o1, %o2, %o1
- add %o0, %o2, %o0
- bne 77f
+9: andcc %o4, 3, %o5
+0: bne 86f
cmp %o2, 15
- bleu 91f
- andcc %o1, 3, %g0
- be 3f
- nop
-
- andcc %o1, 1, %g0
- be 4f
- andcc %o1, 2, %g0
-
- ldub [%o1 - 1], %g2
- sub %o1, 1, %o1
- stb %g2, [%o0 - 1]
- sub %o2, 1, %o2
- be 3f
- sub %o0, 1, %o0
-4: lduh [%o1 - 2], %g2
- sub %o1, 2, %o1
- sth %g2, [%o0 - 2]
- sub %o2, 2, %o2
- sub %o0, 2, %o0
-
-3: andcc %o1, 4, %g0
-
- be 2f
- mov %o2, %g1
-
- ld [%o1 - 4], %o4
- sub %g1, 4, %g1
- st %o4, [%o0 - 4]
- sub %o1, 4, %o1
- sub %o0, 4, %o0
-2: andcc %g1, 0xffffff80, %g6
- be 3f
- andcc %o0, 4, %g0
-
- be 74f + 4
-5: RMOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
- RMOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
- RMOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
- RMOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
- subcc %g6, 128, %g6
- sub %o1, 128, %o1
- bne 5b
- sub %o0, 128, %o0
-
-3: andcc %g1, 0x70, %g6
- be 72f
- andcc %g1, 8, %g0
-
- srl %g6, 1, %o4
- mov %o7, %g2
- add %g6, %o4, %o4
-101: call 100f
- sub %o1, %g6, %o1
- mov %g2, %o7
- jmpl %o5 + (72f - 101b), %g0
- sub %o0, %g6, %o0
-
-71: RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
- RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
- RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
- RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
- RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
- RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
- RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
-72: be 73f
- andcc %g1, 4, %g0
-
- ldd [%o1 - 0x08], %g2
- sub %o0, 8, %o0
- sub %o1, 8, %o1
- st %g2, [%o0]
- st %g3, [%o0 + 0x04]
-73: be 1f
- andcc %g1, 2, %g0
-
- ld [%o1 - 4], %g2
- sub %o1, 4, %o1
- st %g2, [%o0 - 4]
- sub %o0, 4, %o0
-1: be 1f
- andcc %g1, 1, %g0
-
- lduh [%o1 - 2], %g2
- sub %o1, 2, %o1
- sth %g2, [%o0 - 2]
- sub %o0, 2, %o0
-1: be 1f
- nop
-
- ldub [%o1 - 1], %g2
- stb %g2, [%o0 - 1]
-1: retl
- ld [%sp + 64], %o0
-
-74: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
- RMOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
- RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
- RMOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
- subcc %g6, 128, %g6
- sub %o1, 128, %o1
- bne 74b
- sub %o0, 128, %o0
-
- andcc %g1, 0x70, %g6
- be 72b
- andcc %g1, 8, %g0
-
- srl %g6, 1, %o4
- mov %o7, %g2
- add %g6, %o4, %o4
-102: call 100f
- sub %o1, %g6, %o1
- mov %g2, %o7
- jmpl %o5 + (72b - 102b), %g0
- sub %o0, %g6, %o0
-
-75: and %o2, 0xe, %o3
- mov %o7, %g2
- sll %o3, 3, %o4
- sub %o0, %o3, %o0
-103: call 100f
- sub %o1, %o3, %o1
- mov %g2, %o7
- jmpl %o5 + (76f - 103b), %g0
- andcc %o2, 1, %g0
-
- RMOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
- RMOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
- RMOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
- RMOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
- RMOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
- RMOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
- RMOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
-
-76: be 1f
- nop
- ldub [%o1 - 1], %g2
- stb %g2, [%o0 - 1]
-1: retl
- ld [%sp + 64], %o0
-91: bne 75b
- andcc %o2, 8, %g0
-
- be 1f
- andcc %o2, 4, %g0
-
- ld [%o1 - 0x08], %g2
- ld [%o1 - 0x04], %g3
- sub %o1, 8, %o1
- st %g2, [%o0 - 0x08]
- st %g3, [%o0 - 0x04]
- sub %o0, 8, %o0
-1: b 73b
- mov %o2, %g1
-
-77: cmp %o2, 15
- bleu 75b
- andcc %o0, 3, %g0
- be 64f
- andcc %o0, 1, %g0
- be 63f
- andcc %o0, 2, %g0
- ldub [%o1 - 1], %g5
- sub %o1, 1, %o1
- stb %g5, [%o0 - 1]
- sub %o0, 1, %o0
- be 64f
- sub %o2, 1, %o2
-
-63: ldub [%o1 - 1], %g5
- sub %o1, 2, %o1
- stb %g5, [%o0 - 1]
- sub %o0, 2, %o0
- ldub [%o1], %g5
- sub %o2, 2, %o2
- stb %g5, [%o0]
-64: and %o1, 3, %g2
- and %o1, -4, %o1
- and %o2, 0xc, %g3
- add %o1, 4, %o1
- cmp %g3, 4
- sll %g2, 3, %g4
- mov 32, %g2
- be 4f
- sub %g2, %g4, %g6
-
- blu 3f
- cmp %g3, 8
-
- be 2f
- srl %o2, 2, %g3
-
- ld [%o1 - 4], %o3
- add %o0, -8, %o0
- ld [%o1 - 8], %o4
- add %o1, -16, %o1
- b 7f
- add %g3, 1, %g3
-2: ld [%o1 - 4], %o4
- add %o0, -4, %o0
- ld [%o1 - 8], %g1
- add %o1, -12, %o1
- b 8f
- add %g3, 2, %g3
-3: ld [%o1 - 4], %o5
- add %o0, -12, %o0
- ld [%o1 - 8], %o3
- add %o1, -20, %o1
- b 6f
- srl %o2, 2, %g3
-4: ld [%o1 - 4], %g1
- srl %o2, 2, %g3
- ld [%o1 - 8], %o5
- add %o1, -24, %o1
- add %o0, -16, %o0
- add %g3, -1, %g3
+ bleu 90f
+ andcc %o1, 3, %g0
- ld [%o1 + 12], %o3
-5: sll %o5, %g4, %g2
- srl %g1, %g6, %g5
- or %g2, %g5, %g2
- st %g2, [%o0 + 12]
-6: ld [%o1 + 8], %o4
- sll %o3, %g4, %g2
- srl %o5, %g6, %g5
- or %g2, %g5, %g2
- st %g2, [%o0 + 8]
-7: ld [%o1 + 4], %g1
- sll %o4, %g4, %g2
- srl %o3, %g6, %g5
- or %g2, %g5, %g2
- st %g2, [%o0 + 4]
-8: ld [%o1], %o5
- sll %g1, %g4, %g2
- srl %o4, %g6, %g5
- addcc %g3, -4, %g3
- or %g2, %g5, %g2
- add %o1, -16, %o1
- st %g2, [%o0]
- add %o0, -16, %o0
- bne,a 5b
- ld [%o1 + 12], %o3
- sll %o5, %g4, %g2
- srl %g1, %g6, %g5
- srl %g4, 3, %g3
- or %g2, %g5, %g2
- add %o1, %g3, %o1
- andcc %o2, 2, %g0
- st %g2, [%o0 + 12]
- be 1f
- andcc %o2, 1, %g0
-
- ldub [%o1 + 15], %g5
- add %o1, -2, %o1
- stb %g5, [%o0 + 11]
- add %o0, -2, %o0
- ldub [%o1 + 16], %g5
- stb %g5, [%o0 + 12]
-1: be 1f
- nop
- ldub [%o1 + 15], %g5
- stb %g5, [%o0 + 11]
-1: retl
- ld [%sp + 64], %o0
+ be 78f
+ andcc %o1, 4, %g0
-78: andcc %o1, 1, %g0
+ andcc %o1, 1, %g0
be 4f
andcc %o1, 2, %g0
@@ -442,30 +128,16 @@ ENTRY(memmove)
add %o1, 1, %o1
stb %g2, [%o0]
sub %o2, 1, %o2
- bne 3f
+ bne 77f
add %o0, 1, %o0
4: lduh [%o1], %g2
add %o1, 2, %o1
sth %g2, [%o0]
sub %o2, 2, %o2
- b 3f
- add %o0, 2, %o0
-END(memmove)
-
-ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */
- sub %o0, %o1, %o4
- st %o0, [%sp + 64]
-9: andcc %o4, 3, %o5
-0: bne 86f
- cmp %o2, 15
-
- bleu 90f
- andcc %o1, 3, %g0
-
- bne 78b
-3: andcc %o1, 4, %g0
+ add %o0, 2, %o0
- be 2f
+77: andcc %o1, 4, %g0
+78: be 2f
mov %o2, %g1
ld [%o1], %o4
@@ -968,5 +640,5 @@ ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */
110: retl
sub %o7, %g6, %o5
END(memcpy)
+
libc_hidden_builtin_def (memcpy)
-libc_hidden_builtin_def (memmove)
diff --git a/libc/sysdeps/sparc/sparc32/memmove.c b/libc/sysdeps/sparc/sparc32/memmove.c
deleted file mode 100644
index a8d2d4994..000000000
--- a/libc/sysdeps/sparc/sparc32/memmove.c
+++ /dev/null
@@ -1 +0,0 @@
-/* memmove is in memcpy.S */
diff --git a/libc/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S b/libc/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S
deleted file mode 100644
index c3f097118..000000000
--- a/libc/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S
+++ /dev/null
@@ -1,215 +0,0 @@
-! SPARC __udiv_qrnnd division support, used from longlong.h.
-
-! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
-
-! This file is part of the GNU MP Library.
-
-! The GNU MP Library is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 2.1 of the License, or (at your
-! option) any later version.
-
-! The GNU MP Library is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-! License for more details.
-
-! You should have received a copy of the GNU Lesser General Public License
-! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-
-! INPUT PARAMETERS
-! rem_ptr o0
-! n1 o1
-! n0 o2
-! d o3
-
-#include "sysdep.h"
-
-ENTRY(__udiv_qrnnd)
- tst %o3
- bneg LOC(largedivisor)
- mov 8,%g1
-
- b LOC(p1)
- addxcc %o2,%o2,%o2
-
-LOC(plop):
- bcc LOC(n1)
- addxcc %o2,%o2,%o2
-LOC(p1):
- addx %o1,%o1,%o1
- subcc %o1,%o3,%o4
- bcc LOC(n2)
- addxcc %o2,%o2,%o2
-LOC(p2):
- addx %o1,%o1,%o1
- subcc %o1,%o3,%o4
- bcc LOC(n3)
- addxcc %o2,%o2,%o2
-LOC(p3):
- addx %o1,%o1,%o1
- subcc %o1,%o3,%o4
- bcc LOC(n4)
- addxcc %o2,%o2,%o2
-LOC(p4):
- addx %o1,%o1,%o1
- addcc %g1,-1,%g1
- bne LOC(plop)
- subcc %o1,%o3,%o4
- bcc LOC(n5)
- addxcc %o2,%o2,%o2
-LOC(p5):
- st %o1,[%o0]
- retl
- xnor %g0,%o2,%o0
-
-LOC(nlop):
- bcc LOC(p1)
- addxcc %o2,%o2,%o2
-LOC(n1):
- addx %o4,%o4,%o4
- subcc %o4,%o3,%o1
- bcc LOC(p2)
- addxcc %o2,%o2,%o2
-LOC(n2):
- addx %o4,%o4,%o4
- subcc %o4,%o3,%o1
- bcc LOC(p3)
- addxcc %o2,%o2,%o2
-LOC(n3):
- addx %o4,%o4,%o4
- subcc %o4,%o3,%o1
- bcc LOC(p4)
- addxcc %o2,%o2,%o2
-LOC(n4):
- addx %o4,%o4,%o4
- addcc %g1,-1,%g1
- bne LOC(nlop)
- subcc %o4,%o3,%o1
- bcc LOC(p5)
- addxcc %o2,%o2,%o2
-LOC(n5):
- st %o4,[%o0]
- retl
- xnor %g0,%o2,%o0
-
-LOC(largedivisor):
- and %o2,1,%o5 ! %o5 = n0 & 1
-
- srl %o2,1,%o2
- sll %o1,31,%g2
- or %g2,%o2,%o2 ! %o2 = lo(n1n0 >> 1)
- srl %o1,1,%o1 ! %o1 = hi(n1n0 >> 1)
-
- and %o3,1,%g2
- srl %o3,1,%g3 ! %g3 = floor(d / 2)
- add %g3,%g2,%g3 ! %g3 = ceil(d / 2)
-
- b LOC(Lp1)
- addxcc %o2,%o2,%o2
-
-LOC(Lplop):
- bcc LOC(Ln1)
- addxcc %o2,%o2,%o2
-LOC(Lp1):
- addx %o1,%o1,%o1
- subcc %o1,%g3,%o4
- bcc LOC(Ln2)
- addxcc %o2,%o2,%o2
-LOC(Lp2):
- addx %o1,%o1,%o1
- subcc %o1,%g3,%o4
- bcc LOC(Ln3)
- addxcc %o2,%o2,%o2
-LOC(Lp3):
- addx %o1,%o1,%o1
- subcc %o1,%g3,%o4
- bcc LOC(Ln4)
- addxcc %o2,%o2,%o2
-LOC(Lp4):
- addx %o1,%o1,%o1
- addcc %g1,-1,%g1
- bne LOC(Lplop)
- subcc %o1,%g3,%o4
- bcc LOC(Ln5)
- addxcc %o2,%o2,%o2
-LOC(Lp5):
- add %o1,%o1,%o1 ! << 1
- tst %g2
- bne LOC(Oddp)
- add %o5,%o1,%o1
- st %o1,[%o0]
- retl
- xnor %g0,%o2,%o0
-
-LOC(Lnlop):
- bcc LOC(Lp1)
- addxcc %o2,%o2,%o2
-LOC(Ln1):
- addx %o4,%o4,%o4
- subcc %o4,%g3,%o1
- bcc LOC(Lp2)
- addxcc %o2,%o2,%o2
-LOC(Ln2):
- addx %o4,%o4,%o4
- subcc %o4,%g3,%o1
- bcc LOC(Lp3)
- addxcc %o2,%o2,%o2
-LOC(Ln3):
- addx %o4,%o4,%o4
- subcc %o4,%g3,%o1
- bcc LOC(Lp4)
- addxcc %o2,%o2,%o2
-LOC(Ln4):
- addx %o4,%o4,%o4
- addcc %g1,-1,%g1
- bne LOC(Lnlop)
- subcc %o4,%g3,%o1
- bcc LOC(Lp5)
- addxcc %o2,%o2,%o2
-LOC(Ln5):
- add %o4,%o4,%o4 ! << 1
- tst %g2
- bne LOC(Oddn)
- add %o5,%o4,%o4
- st %o4,[%o0]
- retl
- xnor %g0,%o2,%o0
-
-LOC(Oddp):
- xnor %g0,%o2,%o2
- ! q' in %o2. r' in %o1
- addcc %o1,%o2,%o1
- bcc LOC(Lp6)
- addx %o2,0,%o2
- sub %o1,%o3,%o1
-LOC(Lp6):
- subcc %o1,%o3,%g0
- bcs LOC(Lp7)
- subx %o2,-1,%o2
- sub %o1,%o3,%o1
-LOC(Lp7):
- st %o1,[%o0]
- retl
- mov %o2,%o0
-
-LOC(Oddn):
- xnor %g0,%o2,%o2
- ! q' in %o2. r' in %o4
- addcc %o4,%o2,%o4
- bcc LOC(Ln6)
- addx %o2,0,%o2
- sub %o4,%o3,%o4
-LOC(Ln6):
- subcc %o4,%o3,%g0
- bcs LOC(Ln7)
- subx %o2,-1,%o2
- sub %o4,%o3,%o4
-LOC(Ln7):
- st %o4,[%o0]
- retl
- mov %o2,%o0
-
-END(__udiv_qrnnd)
diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/bcopy.c b/libc/sysdeps/sparc/sparc32/sparcv9/bcopy.c
deleted file mode 100644
index 9a455f33c..000000000
--- a/libc/sysdeps/sparc/sparc32/sparcv9/bcopy.c
+++ /dev/null
@@ -1 +0,0 @@
-/* bcopy is in memcpy.S */
diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/memmove.c b/libc/sysdeps/sparc/sparc32/sparcv9/memmove.c
deleted file mode 100644
index a8d2d4994..000000000
--- a/libc/sysdeps/sparc/sparc32/sparcv9/memmove.c
+++ /dev/null
@@ -1 +0,0 @@
-/* memmove is in memcpy.S */
diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile b/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile
new file mode 100644
index 000000000..4d45042a9
--- /dev/null
+++ b/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile
@@ -0,0 +1,4 @@
+ifeq ($(subdir),string)
+sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \
+ memset-niagara1
+endif
diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara1.S b/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara1.S
new file mode 100644
index 000000000..10aef85fe
--- /dev/null
+++ b/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara1.S
@@ -0,0 +1,2 @@
+#define XCC icc
+#include <sparc64/multiarch/memcpy-niagara1.S>
diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara2.S b/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara2.S
new file mode 100644
index 000000000..6b1bf6ea7
--- /dev/null
+++ b/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara2.S
@@ -0,0 +1,2 @@
+#define XCC icc
+#include <sparc64/multiarch/memcpy-niagara2.S>
diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra3.S b/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra3.S
new file mode 100644
index 000000000..77adf151a
--- /dev/null
+++ b/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra3.S
@@ -0,0 +1,2 @@
+#define XCC icc
+#include <sparc64/multiarch/memcpy-ultra3.S>
diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy.S b/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy.S
new file mode 100644
index 000000000..14df91e00
--- /dev/null
+++ b/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy.S
@@ -0,0 +1,4 @@
+#define ASI_PNF 0x82
+#define ASI_BLK_P 0xf0
+#define XCC icc
+#include <sparc64/multiarch/memcpy.S>
diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara1.S b/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara1.S
new file mode 100644
index 000000000..b43242087
--- /dev/null
+++ b/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara1.S
@@ -0,0 +1,2 @@
+#define XCC icc
+#include <sparc64/multiarch/memset-niagara1.S>
diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memset.S b/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memset.S
new file mode 100644
index 000000000..8f8264337
--- /dev/null
+++ b/libc/sysdeps/sparc/sparc32/sparcv9/multiarch/memset.S
@@ -0,0 +1,4 @@
+#define ASI_PNF 0x82
+#define ASI_BLK_P 0xf0
+#define XCC icc
+#include <sparc64/multiarch/memset.S>
diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/sparcv9b/memcpy.S b/libc/sysdeps/sparc/sparc32/sparcv9/sparcv9b/memcpy.S
deleted file mode 100644
index 61960dce6..000000000
--- a/libc/sysdeps/sparc/sparc32/sparcv9/sparcv9b/memcpy.S
+++ /dev/null
@@ -1,2 +0,0 @@
-#define XCC icc
-#include <sparc64/sparcv9b/memcpy.S>
diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memcpy.S b/libc/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memcpy.S
deleted file mode 100644
index 4c05f57bc..000000000
--- a/libc/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memcpy.S
+++ /dev/null
@@ -1,2 +0,0 @@
-#define XCC icc
-#include <sparc64/sparcv9v/memcpy.S>
diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memset.S b/libc/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memset.S
deleted file mode 100644
index 5e46c7489..000000000
--- a/libc/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memset.S
+++ /dev/null
@@ -1,2 +0,0 @@
-#define XCC icc
-#include <sparc64/sparcv9v/memset.S>
diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memcpy.S b/libc/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memcpy.S
deleted file mode 100644
index 7f4606037..000000000
--- a/libc/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memcpy.S
+++ /dev/null
@@ -1,2 +0,0 @@
-#define XCC icc
-#include <sparc64/sparcv9v2/memcpy.S>
diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memset.S b/libc/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memset.S
deleted file mode 100644
index 72de7bb0c..000000000
--- a/libc/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memset.S
+++ /dev/null
@@ -1,2 +0,0 @@
-#define XCC icc
-#include <sparc64/sparcv9v2/memset.S>
diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/strlen.S b/libc/sysdeps/sparc/sparc32/sparcv9/strlen.S
index b8f4dba4f..28a216c07 100644
--- a/libc/sysdeps/sparc/sparc32/sparcv9/strlen.S
+++ b/libc/sysdeps/sparc/sparc32/sparcv9/strlen.S
@@ -1,4 +1 @@
-#define ASI_PNF 0x82
-#define ASI_BLK_P 0xf0
-#define XCC icc
#include <sparc64/strlen.S>
diff --git a/libc/sysdeps/sparc/sparc32/strlen.S b/libc/sysdeps/sparc/sparc32/strlen.S
index ed92f20e2..2945bb548 100644
--- a/libc/sysdeps/sparc/sparc32/strlen.S
+++ b/libc/sysdeps/sparc/sparc32/strlen.S
@@ -1,8 +1,9 @@
/* Determine the length of a string.
For SPARC v7.
- Copyright (C) 1996, 1999, 2003 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1999, 2003, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Jakub Jelinek <jj@ultra.linux.cz>.
+ Contributed by Jakub Jelinek <jj@ultra.linux.cz> and
+ David S. Miller <davem@davemloft.net>.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -21,86 +22,55 @@
#include <sysdep.h>
- /* Normally, this uses ((xword - 0x01010101) & 0x80808080) test
- to find out if any byte in xword could be zero. This is fast, but
- also gives false alarm for any byte in range 0x81-0xff. It does
- not matter for correctness, as if this test tells us there could
- be some zero byte, we check it byte by byte, but if bytes with
- high bits set are common in the strings, then this will give poor
- performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
- will use one tick slower, but more precise test
- ((xword - 0x01010101) & (~xword) & 0x80808080),
- which does not give any false alarms (but if some bits are set,
- one cannot assume from it which bytes are zero and which are not).
- It is yet to be measured, what is the correct default for glibc
- in these days for an average user.
- */
-
.text
.align 4
ENTRY(strlen)
- mov %o0, %o1
- andcc %o0, 3, %g0
- be 20f
- sethi %hi(0x80808080), %o4
-
- ldub [%o0], %o5
- cmp %o5, 0
- be 21f
- add %o0, 1, %o0
- andcc %o0, 3, %g0
- be 4f
- or %o4, %lo(0x80808080), %o3
- ldub [%o0], %o5
- cmp %o5, 0
- be 22f
- add %o0, 1, %o0
- andcc %o0, 3, %g0
- be 5f
- sethi %hi(0x01010101), %o4
- ldub [%o0], %o5
- cmp %o5, 0
- be 23f
- add %o0, 1, %o0
- b 11f
- or %o4, %lo(0x01010101), %o2
-21: retl
- mov 0, %o0
-22: retl
- mov 1, %o0
-23: retl
- mov 2, %o0
-
-20: or %o4, %lo(0x80808080), %o3
-4: sethi %hi(0x01010101), %o4
-5: or %o4, %lo(0x01010101), %o2
-11: ld [%o0], %o5
-12: sub %o5, %o2, %o4
-#ifdef EIGHTBIT_NOT_RARE
- andn %o4, %o5, %o4
-#endif
- andcc %o4, %o3, %g0
- be 11b
- add %o0, 4, %o0
-
- srl %o5, 24, %g5
- andcc %g5, 0xff, %g0
- be 13f
- add %o0, -4, %o4
- srl %o5, 16, %g5
- andcc %g5, 0xff, %g0
- be 13f
- add %o4, 1, %o4
- srl %o5, 8, %g5
- andcc %g5, 0xff, %g0
- be 13f
- add %o4, 1, %o4
- andcc %o5, 0xff, %g0
- bne,a 12b
- ld [%o0], %o5
- add %o4, 1, %o4
-13: retl
- sub %o4, %o1, %o0
+ mov %o0, %o1
+ andn %o0, 0x3, %o0
+
+ ld [%o0], %o5
+ and %o1, 0x3, %g1
+ mov -1, %g5
+
+ sethi %hi(0x01010101), %o2
+ sll %g1, 3, %g1
+
+ or %o2, %lo(0x01010101), %o2
+ srl %g5, %g1, %g2
+
+ orn %o5, %g2, %o5
+ sll %o2, 7, %o3
+10: add %o0, 4, %o0
+
+ andn %o3, %o5, %g1
+ sub %o5, %o2, %g2
+
+ andcc %g1, %g2, %g0
+ be,a 10b
+ ld [%o0], %o5
+
+ srl %o5, 24, %g1
+
+ andcc %g1, 0xff, %g0
+ be 90f
+ sub %o0, 4, %o0
+
+ srl %o5, 16, %g2
+
+ andcc %g2, 0xff, %g0
+ be 90f
+ add %o0, 1, %o0
+
+ srl %o5, 8, %g1
+
+ andcc %g1, 0xff, %g0
+ be 90f
+ add %o0, 1, %o0
+
+ add %o0, 1, %o0
+
+90: retl
+ sub %o0, %o1, %o0
END(strlen)
libc_hidden_builtin_def (strlen)
diff --git a/libc/sysdeps/sparc/sparc32/udiv_qrnnd.S b/libc/sysdeps/sparc/sparc32/udiv_qrnnd.S
deleted file mode 100644
index 4955318a6..000000000
--- a/libc/sysdeps/sparc/sparc32/udiv_qrnnd.S
+++ /dev/null
@@ -1,168 +0,0 @@
-! SPARC __udiv_qrnnd division support, used from longlong.h.
-!
-! Copyright (C) 1993, 1994, 1997 Free Software Foundation, Inc.
-!
-! This file is part of the GNU MP Library.
-!
-! The GNU MP Library is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 2.1 of the License, or (at your
-! option) any later version.
-!
-! The GNU MP Library is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-! License for more details.
-!
-! You should have received a copy of the GNU Lesser General Public License
-! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-!
-! Added PIC support - May/96, Miguel de Icaza
-!
-! INPUT PARAMETERS
-! rem_ptr i0
-! n1 i1
-! n0 i2
-! d i3
-
-#include <sysdep.h>
-#undef ret /* Kludge for glibc */
-
-#ifdef PIC
- .text
-#else
- .section .rodata,#alloc
-#endif
- .align 8
-
- .type two_to_32,@object
- .size two_to_32,8
-two_to_32:
- .double 0r4294967296
-
- .type two_to_31,@object
- .size two_to_31,8
-two_to_31:
- .double 0r2147483648
-
- .text
-ENTRY(__udiv_qrnnd)
- !#PROLOGUE# 0
- save %sp,-104,%sp
- !#PROLOGUE# 1
- st %i1,[%fp-8]
- ld [%fp-8],%f10
-#ifdef PIC
-LOC(base):
- call 1f
- fitod %f10,%f4
-1: ldd [%o7-(LOC(base)-two_to_32)],%f8
-#else
- sethi %hi(two_to_32),%o7
- fitod %f10,%f4
- ldd [%o7+%lo(two_to_32)],%f8
-#endif
- cmp %i1,0
- bge LOC(248)
- mov %i0,%i5
- faddd %f4,%f8,%f4
-LOC(248):
- st %i2,[%fp-8]
- ld [%fp-8],%f10
- fmuld %f4,%f8,%f6
- cmp %i2,0
- bge LOC(249)
- fitod %f10,%f2
- faddd %f2,%f8,%f2
-LOC(249):
- st %i3,[%fp-8]
- faddd %f6,%f2,%f2
- ld [%fp-8],%f10
- cmp %i3,0
- bge LOC(250)
- fitod %f10,%f4
- faddd %f4,%f8,%f4
-LOC(250):
- fdivd %f2,%f4,%f2
-#ifdef PIC
- ldd [%o7-(LOC(base)-two_to_31)],%f4
-#else
- sethi %hi(two_to_31),%o7
- ldd [%o7+%lo(two_to_31)],%f4
-#endif
- fcmped %f2,%f4
- nop
- fbge,a LOC(251)
- fsubd %f2,%f4,%f2
- fdtoi %f2,%f2
- st %f2,[%fp-8]
- b LOC(252)
- ld [%fp-8],%i4
-LOC(251):
- fdtoi %f2,%f2
- st %f2,[%fp-8]
- ld [%fp-8],%i4
- sethi %hi(-2147483648),%g2
- xor %i4,%g2,%i4
-LOC(252):
- wr %g0,%i4,%y
- sra %i3,31,%g2
- and %i4,%g2,%g2
- andcc %g0,0,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,%i3,%g1
- mulscc %g1,0,%g1
- add %g1,%g2,%i0
- rd %y,%g3
- subcc %i2,%g3,%o7
- subxcc %i1,%i0,%g0
- be LOC(253)
- cmp %o7,%i3
-
- add %i4,-1,%i0
- add %o7,%i3,%o7
- st %o7,[%i5]
- ret
- restore
-LOC(253):
- blu LOC(246)
- mov %i4,%i0
- add %i4,1,%i0
- sub %o7,%i3,%o7
-LOC(246):
- st %o7,[%i5]
- ret
- restore
-
-END(__udiv_qrnnd)
diff --git a/libc/sysdeps/sparc/sparc64/Implies b/libc/sysdeps/sparc/sparc64/Implies
index 01bf14e73..7abc50efc 100644
--- a/libc/sysdeps/sparc/sparc64/Implies
+++ b/libc/sysdeps/sparc/sparc64/Implies
@@ -1,6 +1,7 @@
wordsize-64
# SPARC uses IEEE 754 floating point.
ieee754/ldbl-128
+ieee754/dbl-64/wordsize-64
ieee754/dbl-64
ieee754/flt-32
sparc/sparc64/soft-fp
diff --git a/libc/sysdeps/sparc/sparc64/Makefile b/libc/sysdeps/sparc/sparc64/Makefile
index 3bb023883..1a859dffc 100644
--- a/libc/sysdeps/sparc/sparc64/Makefile
+++ b/libc/sysdeps/sparc/sparc64/Makefile
@@ -6,3 +6,7 @@ endif
ifeq ($(subdir),csu)
CFLAGS-initfini.s += -mcpu=v9
endif
+
+ifeq ($(subdir),string)
+sysdep_routines += align-cpy
+endif
diff --git a/libc/sysdeps/sparc/sparc64/align-cpy.S b/libc/sysdeps/sparc/sparc64/align-cpy.S
new file mode 100644
index 000000000..bae788fe4
--- /dev/null
+++ b/libc/sysdeps/sparc/sparc64/align-cpy.S
@@ -0,0 +1,85 @@
+/* Aligned copy routines specified by Sparc V9 ABI.
+ For 64-bit sparc.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by David S. Miller (davem@davemloft.net)
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+
+ .text
+ .align 8
+ENTRY(__align_cpy_8)
+10: cmp %o0, %o1
+ be,pn %xcc, 9f
+ mov %o0, %o3
+ subcc %o2, 0x08, %o2
+ be,pn %xcc, 8f
+1: ldx [%o1 + 0x00], %o5
+ ldx [%o1 + 0x08], %o4
+ subcc %o2, 0x10, %o2
+ add %o1, 0x10, %o1
+ stx %o5, [%o3 + 0x00]
+ stx %o4, [%o3 + 0x08]
+ bg,pt %xcc, 1b
+ add %o3, 0x10, %o3
+ bne,pn %xcc, 9f
+ nop
+ ldx [%o1 + 0x00], %o5
+8: stx %o5, [%o3 + 0x00]
+9: retl
+ nop
+END(__align_cpy_8)
+
+ .align 8
+ENTRY(__align_cpy_4)
+20: cmp %o0, %o1
+ be,pn %xcc, 9f
+ mov %o0, %o3
+ subcc %o2, 0x04, %o2
+ be,pn %xcc, 8f
+1: lduw [%o1 + 0x00], %o5
+ lduw [%o1 + 0x04], %o4
+ subcc %o2, 0x08, %o2
+ add %o1, 0x08, %o1
+ stw %o5, [%o3 + 0x00]
+ stw %o4, [%o3 + 0x04]
+ bg,pt %xcc, 1b
+ add %o3, 0x08, %o3
+ bne,pn %xcc, 9f
+ nop
+ lduw [%o1 + 0x00], %o5
+8: stw %o5, [%o3 + 0x00]
+9: retl
+ nop
+END(__align_cpy_4)
+
+ .align 8
+ENTRY(__align_cpy_2)
+ or %o0, %o1, %o3
+ or %o2, %o3, %o3
+ andcc %o3, 0x7, %g0
+ be,pt %xcc, 10b
+ andcc %o3, 0x3, %g0
+ be,pt %xcc, 20b
+ mov %o7, %g1
+ call HIDDEN_JUMPTARGET(memcpy)
+ mov %o7, %g1
+END(__align_cpy_2)
+
+weak_alias (__align_cpy_8, __align_cpy_16)
+weak_alias (__align_cpy_2, __align_cpy_1)
diff --git a/libc/sysdeps/sparc/sparc64/bcopy.c b/libc/sysdeps/sparc/sparc64/bcopy.c
deleted file mode 100644
index 9a455f33c..000000000
--- a/libc/sysdeps/sparc/sparc64/bcopy.c
+++ /dev/null
@@ -1 +0,0 @@
-/* bcopy is in memcpy.S */
diff --git a/libc/sysdeps/sparc/sparc64/dl-irel.h b/libc/sysdeps/sparc/sparc64/dl-irel.h
new file mode 100644
index 000000000..1a2a0a3dd
--- /dev/null
+++ b/libc/sysdeps/sparc/sparc64/dl-irel.h
@@ -0,0 +1,58 @@
+/* Machine-dependent ELF indirect relocation inline functions.
+ SPARC 64-bit version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _DL_IREL_H
+#define _DL_IREL_H
+
+#include <stdio.h>
+#include <unistd.h>
+#include <dl-plt.h>
+
+#define ELF_MACHINE_IRELA 1
+
+static inline void
+__attribute ((always_inline))
+elf_irela (const Elf64_Rela *reloc)
+{
+ unsigned int r_type = (reloc->r_info & 0xff);
+
+ if (__builtin_expect (r_type == R_SPARC_IRELATIVE, 1))
+ {
+ Elf64_Addr *const reloc_addr = (void *) reloc->r_offset;
+ Elf64_Addr value = ((Elf64_Addr (*) (void)) reloc->r_addend) ();
+ *reloc_addr = value;
+ }
+ else if (__builtin_expect (r_type == R_SPARC_JMP_IREL, 1))
+ {
+ Elf64_Addr *const reloc_addr = (void *) reloc->r_offset;
+ Elf64_Addr value = ((Elf64_Addr (*) (void)) reloc->r_addend) ();
+ struct link_map map = { .l_addr = 0 };
+
+ /* 'high' is always zero, for large PLT entries the linker
+ emits an R_SPARC_IRELATIVE. */
+ sparc64_fixup_plt (&map, reloc, reloc_addr, value, 0, 0);
+ }
+ else if (r_type == R_SPARC_NONE)
+ ;
+ else
+ __libc_fatal ("unexpected reloc type in static binary");
+}
+
+#endif /* dl-irel.h */
diff --git a/libc/sysdeps/sparc/sparc64/dl-machine.h b/libc/sysdeps/sparc/sparc64/dl-machine.h
index 3e749a74d..82ab5a454 100644
--- a/libc/sysdeps/sparc/sparc64/dl-machine.h
+++ b/libc/sysdeps/sparc/sparc64/dl-machine.h
@@ -1,6 +1,6 @@
/* Machine-dependent ELF dynamic relocation inline functions. Sparc64 version.
- Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
- Free Software Foundation, Inc.
+ Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2009, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -27,6 +27,7 @@
#include <sys/param.h>
#include <ldsodefs.h>
#include <sysdep.h>
+#include <dl-plt.h>
#ifndef VALIDX
# define VALIDX(tag) (DT_NUM + DT_THISPROCNUM + DT_VERSIONTAGNUM \
@@ -89,132 +90,6 @@ elf_machine_load_address (void)
return (Elf64_Addr) got - *got + (Elf32_Sword) ((pc[2] - pc[3]) * 4) - 4;
}
-/* We have 4 cases to handle. And we code different code sequences
- for each one. I love V9 code models... */
-static inline void __attribute__ ((always_inline))
-sparc64_fixup_plt (struct link_map *map, const Elf64_Rela *reloc,
- Elf64_Addr *reloc_addr, Elf64_Addr value,
- Elf64_Addr high, int t)
-{
- unsigned int *insns = (unsigned int *) reloc_addr;
- Elf64_Addr plt_vaddr = (Elf64_Addr) reloc_addr;
- Elf64_Sxword disp = value - plt_vaddr;
-
- /* Now move plt_vaddr up to the call instruction. */
- plt_vaddr += ((t + 1) * 4);
-
- /* PLT entries .PLT32768 and above look always the same. */
- if (__builtin_expect (high, 0) != 0)
- {
- *reloc_addr = value - map->l_addr;
- }
- /* Near destination. */
- else if (disp >= -0x800000 && disp < 0x800000)
- {
- /* As this is just one instruction, it is thread safe and so
- we can avoid the unnecessary sethi FOO, %g1.
- b,a target */
- insns[0] = 0x30800000 | ((disp >> 2) & 0x3fffff);
- __asm __volatile ("flush %0" : : "r" (insns));
- }
- /* 32-bit Sparc style, the target is in the lower 32-bits of
- address space. */
- else if (insns += t, (value >> 32) == 0)
- {
- /* sethi %hi(target), %g1
- jmpl %g1 + %lo(target), %g0 */
-
- insns[1] = 0x81c06000 | (value & 0x3ff);
- __asm __volatile ("flush %0 + 4" : : "r" (insns));
-
- insns[0] = 0x03000000 | ((unsigned int)(value >> 10));
- __asm __volatile ("flush %0" : : "r" (insns));
- }
- /* We can also get somewhat simple sequences if the distance between
- the target and the PLT entry is within +/- 2GB. */
- else if ((plt_vaddr > value
- && ((plt_vaddr - value) >> 31) == 0)
- || (value > plt_vaddr
- && ((value - plt_vaddr) >> 31) == 0))
- {
- unsigned int displacement;
-
- if (plt_vaddr > value)
- displacement = (0 - (plt_vaddr - value));
- else
- displacement = value - plt_vaddr;
-
- /* mov %o7, %g1
- call displacement
- mov %g1, %o7 */
-
- insns[2] = 0x9e100001;
- __asm __volatile ("flush %0 + 8" : : "r" (insns));
-
- insns[1] = 0x40000000 | (displacement >> 2);
- __asm __volatile ("flush %0 + 4" : : "r" (insns));
-
- insns[0] = 0x8210000f;
- __asm __volatile ("flush %0" : : "r" (insns));
- }
- /* Worst case, ho hum... */
- else
- {
- unsigned int high32 = (value >> 32);
- unsigned int low32 = (unsigned int) value;
-
- /* ??? Some tricks can be stolen from the sparc64 egcs backend
- constant formation code I wrote. -DaveM */
-
- if (__builtin_expect (high32 & 0x3ff, 0))
- {
- /* sethi %hh(value), %g1
- sethi %lm(value), %g5
- or %g1, %hm(value), %g1
- or %g5, %lo(value), %g5
- sllx %g1, 32, %g1
- jmpl %g1 + %g5, %g0
- nop */
-
- insns[5] = 0x81c04005;
- __asm __volatile ("flush %0 + 20" : : "r" (insns));
-
- insns[4] = 0x83287020;
- __asm __volatile ("flush %0 + 16" : : "r" (insns));
-
- insns[3] = 0x8a116000 | (low32 & 0x3ff);
- __asm __volatile ("flush %0 + 12" : : "r" (insns));
-
- insns[2] = 0x82106000 | (high32 & 0x3ff);
- }
- else
- {
- /* sethi %hh(value), %g1
- sethi %lm(value), %g5
- sllx %g1, 32, %g1
- or %g5, %lo(value), %g5
- jmpl %g1 + %g5, %g0
- nop */
-
- insns[4] = 0x81c04005;
- __asm __volatile ("flush %0 + 16" : : "r" (insns));
-
- insns[3] = 0x8a116000 | (low32 & 0x3ff);
- __asm __volatile ("flush %0 + 12" : : "r" (insns));
-
- insns[2] = 0x83287020;
- }
-
- __asm __volatile ("flush %0 + 8" : : "r" (insns));
-
- insns[1] = 0x0b000000 | (low32 >> 10);
- __asm __volatile ("flush %0 + 4" : : "r" (insns));
-
- insns[0] = 0x03000000 | (high32 >> 10);
- __asm __volatile ("flush %0" : : "r" (insns));
- }
-}
-
static inline Elf64_Addr __attribute__ ((always_inline))
elf_machine_fixup_plt (struct link_map *map, lookup_t t,
const Elf64_Rela *reloc,
@@ -552,6 +427,11 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc,
value += reloc->r_addend; /* Assume copy relocs have zero addend. */
+ if (sym != NULL
+ && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0)
+ && __builtin_expect (sym->st_shndx != SHN_UNDEF, 1))
+ value = ((Elf64_Addr (*) (void)) value) ();
+
switch (r_type)
{
#if !defined RTLD_BOOTSTRAP && !defined RESOLVE_CONFLICT_FIND_MAP
@@ -579,6 +459,13 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc,
case R_SPARC_GLOB_DAT:
*reloc_addr = value;
break;
+ case R_SPARC_IRELATIVE:
+ value = ((Elf64_Addr (*) (void)) value) ();
+ *reloc_addr = value;
+ break;
+ case R_SPARC_JMP_IREL:
+ value = ((Elf64_Addr (*) (void)) value) ();
+ /* Fall thru */
case R_SPARC_JMP_SLOT:
#ifdef RESOLVE_CONFLICT_FIND_MAP
/* R_SPARC_JMP_SLOT conflicts against .plt[32768+]
@@ -626,11 +513,13 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc,
value = sym->st_value - sym_map->l_tls_offset
+ reloc->r_addend;
if (r_type == R_SPARC_TLS_LE_HIX22)
- *reloc_addr = (*reloc_addr & 0xffc00000)
- | (((~value) >> 10) & 0x3fffff);
+ *(unsigned int *)reloc_addr =
+ ((*(unsigned int *)reloc_addr & 0xffc00000)
+ | (((~value) >> 10) & 0x3fffff));
else
- *reloc_addr = (*reloc_addr & 0xffffe000) | (value & 0x3ff)
- | 0x1c00;
+ *(unsigned int *)reloc_addr =
+ ((*(unsigned int *)reloc_addr & 0xffffe000) | (value & 0x3ff)
+ | 0x1c00);
}
break;
# endif
@@ -760,16 +649,29 @@ __attribute__ ((always_inline))
elf_machine_lazy_rel (struct link_map *map,
Elf64_Addr l_addr, const Elf64_Rela *reloc)
{
- switch (ELF64_R_TYPE (reloc->r_info))
+ Elf64_Addr *const reloc_addr = (void *) (l_addr + reloc->r_offset);
+ const unsigned int r_type = ELF64_R_TYPE (reloc->r_info);
+
+ if (__builtin_expect (r_type == R_SPARC_JMP_SLOT, 1))
+ ;
+ else if (r_type == R_SPARC_JMP_IREL
+ || r_type == R_SPARC_IRELATIVE)
{
- case R_SPARC_NONE:
- break;
- case R_SPARC_JMP_SLOT:
- break;
- default:
- _dl_reloc_bad_type (map, ELFW(R_TYPE) (reloc->r_info), 1);
- break;
+ Elf64_Addr value = map->l_addr + reloc->r_addend;
+ value = ((Elf64_Addr (*) (void)) value) ();
+ if (r_type == R_SPARC_JMP_IREL)
+ {
+ /* 'high' is always zero, for large PLT entries the linker
+ emits an R_SPARC_IRELATIVE. */
+ sparc64_fixup_plt (map, reloc, reloc_addr, value, 0, 1);
+ }
+ else
+ *reloc_addr = value;
}
+ else if (r_type == R_SPARC_NONE)
+ ;
+ else
+ _dl_reloc_bad_type (map, r_type, 1);
}
#endif /* RESOLVE_MAP */
diff --git a/libc/sysdeps/sparc/sparc64/dl-plt.h b/libc/sysdeps/sparc/sparc64/dl-plt.h
new file mode 100644
index 000000000..ca2fe3bbd
--- /dev/null
+++ b/libc/sysdeps/sparc/sparc64/dl-plt.h
@@ -0,0 +1,163 @@
+/* PLT fixups. Sparc 64-bit version.
+ Copyright (C) 1997-2006, 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* We have 4 cases to handle. And we code different code sequences
+ for each one. I love V9 code models... */
+static inline void __attribute__ ((always_inline))
+sparc64_fixup_plt (struct link_map *map, const Elf64_Rela *reloc,
+ Elf64_Addr *reloc_addr, Elf64_Addr value,
+ Elf64_Addr high, int t)
+{
+ unsigned int *insns = (unsigned int *) reloc_addr;
+ Elf64_Addr plt_vaddr = (Elf64_Addr) reloc_addr;
+ Elf64_Sxword disp = value - plt_vaddr;
+
+ /* 't' is '0' if we are resolving this PLT entry for RTLD bootstrap,
+ in which case we'll be resolving all PLT entries and thus can
+ optimize by overwriting instructions starting at the first PLT entry
+ instruction and we need not be mindful of thread safety.
+
+ Otherwise, 't' is '1'.
+
+ Now move plt_vaddr up to the call instruction. */
+ plt_vaddr += ((t + 1) * 4);
+
+ /* PLT entries .PLT32768 and above look always the same. */
+ if (__builtin_expect (high, 0) != 0)
+ {
+ *reloc_addr = value - map->l_addr;
+ }
+ /* Near destination. */
+ else if (disp >= -0x800000 && disp < 0x800000)
+ {
+ unsigned int insn;
+
+ /* ba,a */
+ insn = 0x30800000 | ((disp >> 2) & 0x3fffff);
+
+ if (disp >= -0x100000 && disp < 0x100000)
+ {
+ /* ba,a,pt %icc */
+ insn = 0x30480000 | ((disp >> 2) & 0x07ffff);
+ }
+
+ /* As this is just one instruction, it is thread safe and so we
+ can avoid the unnecessary sethi FOO, %g1. Each 64-bit PLT
+ entry is 8 instructions long, so we can't run into the 'jmp'
+ delay slot problems 32-bit PLTs can. */
+ insns[0] = insn;
+ __asm __volatile ("flush %0" : : "r" (insns));
+ }
+ /* 32-bit Sparc style, the target is in the lower 32-bits of
+ address space. */
+ else if (insns += t, (value >> 32) == 0)
+ {
+ /* sethi %hi(target), %g1
+ jmpl %g1 + %lo(target), %g0 */
+
+ insns[1] = 0x81c06000 | (value & 0x3ff);
+ __asm __volatile ("flush %0 + 4" : : "r" (insns));
+
+ insns[0] = 0x03000000 | ((unsigned int)(value >> 10));
+ __asm __volatile ("flush %0" : : "r" (insns));
+ }
+ /* We can also get somewhat simple sequences if the distance between
+ the target and the PLT entry is within +/- 2GB. */
+ else if ((plt_vaddr > value
+ && ((plt_vaddr - value) >> 31) == 0)
+ || (value > plt_vaddr
+ && ((value - plt_vaddr) >> 31) == 0))
+ {
+ unsigned int displacement;
+
+ if (plt_vaddr > value)
+ displacement = (0 - (plt_vaddr - value));
+ else
+ displacement = value - plt_vaddr;
+
+ /* mov %o7, %g1
+ call displacement
+ mov %g1, %o7 */
+
+ insns[2] = 0x9e100001;
+ __asm __volatile ("flush %0 + 8" : : "r" (insns));
+
+ insns[1] = 0x40000000 | (displacement >> 2);
+ __asm __volatile ("flush %0 + 4" : : "r" (insns));
+
+ insns[0] = 0x8210000f;
+ __asm __volatile ("flush %0" : : "r" (insns));
+ }
+ /* Worst case, ho hum... */
+ else
+ {
+ unsigned int high32 = (value >> 32);
+ unsigned int low32 = (unsigned int) value;
+
+ /* ??? Some tricks can be stolen from the sparc64 egcs backend
+ constant formation code I wrote. -DaveM */
+
+ if (__builtin_expect (high32 & 0x3ff, 0))
+ {
+ /* sethi %hh(value), %g1
+ sethi %lm(value), %g5
+ or %g1, %hm(value), %g1
+ or %g5, %lo(value), %g5
+ sllx %g1, 32, %g1
+ jmpl %g1 + %g5, %g0
+ nop */
+
+ insns[5] = 0x81c04005;
+ __asm __volatile ("flush %0 + 20" : : "r" (insns));
+
+ insns[4] = 0x83287020;
+ __asm __volatile ("flush %0 + 16" : : "r" (insns));
+
+ insns[3] = 0x8a116000 | (low32 & 0x3ff);
+ __asm __volatile ("flush %0 + 12" : : "r" (insns));
+
+ insns[2] = 0x82106000 | (high32 & 0x3ff);
+ }
+ else
+ {
+ /* sethi %hh(value), %g1
+ sethi %lm(value), %g5
+ sllx %g1, 32, %g1
+ or %g5, %lo(value), %g5
+ jmpl %g1 + %g5, %g0
+ nop */
+
+ insns[4] = 0x81c04005;
+ __asm __volatile ("flush %0 + 16" : : "r" (insns));
+
+ insns[3] = 0x8a116000 | (low32 & 0x3ff);
+ __asm __volatile ("flush %0 + 12" : : "r" (insns));
+
+ insns[2] = 0x83287020;
+ }
+
+ __asm __volatile ("flush %0 + 8" : : "r" (insns));
+
+ insns[1] = 0x0b000000 | (low32 >> 10);
+ __asm __volatile ("flush %0 + 4" : : "r" (insns));
+
+ insns[0] = 0x03000000 | (high32 >> 10);
+ __asm __volatile ("flush %0" : : "r" (insns));
+ }
+}
diff --git a/libc/sysdeps/sparc/sparc64/memchr.S b/libc/sysdeps/sparc/sparc64/memchr.S
index 4f13cb570..ab1666bfb 100644
--- a/libc/sysdeps/sparc/sparc64/memchr.S
+++ b/libc/sysdeps/sparc/sparc64/memchr.S
@@ -96,7 +96,7 @@ ENTRY(__memchr)
* %o4 = value XOR c */
2: cmp %o0, %o2 /* IEU1 Group */
- bg,pn %XCC, 11f /* CTI */
+ bgu,pn %XCC, 11f /* CTI */
ldxa [%o0] ASI_PNF, %o3 /* Load */
sub %o4, %g1, %o5 /* IEU0 Group */
add %o0, 8, %o0 /* IEU1 */
diff --git a/libc/sysdeps/sparc/sparc64/memcopy.h b/libc/sysdeps/sparc/sparc64/memcopy.h
new file mode 100644
index 000000000..ec978e3c8
--- /dev/null
+++ b/libc/sysdeps/sparc/sparc64/memcopy.h
@@ -0,0 +1 @@
+#include <sparc32/memcopy.h>
diff --git a/libc/sysdeps/sparc/sparc64/memcpy.S b/libc/sysdeps/sparc/sparc64/memcpy.S
index 599335801..71e710065 100644
--- a/libc/sysdeps/sparc/sparc64/memcpy.S
+++ b/libc/sysdeps/sparc/sparc64/memcpy.S
@@ -136,79 +136,8 @@
stx %t0, [%dst - offset - 0x10]; \
stx %t1, [%dst - offset - 0x08];
- /* Macros for non-VIS memmove code. */
-#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src - offset - 0x20], %t0; \
- ldx [%src - offset - 0x18], %t1; \
- ldx [%src - offset - 0x10], %t2; \
- ldx [%src - offset - 0x08], %t3; \
- stw %t0, [%dst - offset - 0x1c]; \
- srlx %t0, 32, %t0; \
- stw %t0, [%dst - offset - 0x20]; \
- stw %t1, [%dst - offset - 0x14]; \
- srlx %t1, 32, %t1; \
- stw %t1, [%dst - offset - 0x18]; \
- stw %t2, [%dst - offset - 0x0c]; \
- srlx %t2, 32, %t2; \
- stw %t2, [%dst - offset - 0x10]; \
- stw %t3, [%dst - offset - 0x04]; \
- srlx %t3, 32, %t3; \
- stw %t3, [%dst - offset - 0x08];
-
-#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src - offset - 0x20], %t0; \
- ldx [%src - offset - 0x18], %t1; \
- ldx [%src - offset - 0x10], %t2; \
- ldx [%src - offset - 0x08], %t3; \
- stx %t0, [%dst - offset - 0x20]; \
- stx %t1, [%dst - offset - 0x18]; \
- stx %t2, [%dst - offset - 0x10]; \
- stx %t3, [%dst - offset - 0x08]; \
- ldx [%src - offset - 0x40], %t0; \
- ldx [%src - offset - 0x38], %t1; \
- ldx [%src - offset - 0x30], %t2; \
- ldx [%src - offset - 0x28], %t3; \
- stx %t0, [%dst - offset - 0x40]; \
- stx %t1, [%dst - offset - 0x38]; \
- stx %t2, [%dst - offset - 0x30]; \
- stx %t3, [%dst - offset - 0x28];
-
-#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src + offset + 0x00], %t0; \
- ldx [%src + offset + 0x08], %t1; \
- stw %t0, [%dst + offset + 0x04]; \
- srlx %t0, 32, %t2; \
- stw %t2, [%dst + offset + 0x00]; \
- stw %t1, [%dst + offset + 0x0c]; \
- srlx %t1, 32, %t3; \
- stw %t3, [%dst + offset + 0x08];
-
-#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \
- ldx [%src + offset + 0x00], %t0; \
- ldx [%src + offset + 0x08], %t1; \
- stx %t0, [%dst + offset + 0x00]; \
- stx %t1, [%dst + offset + 0x08];
-
.text
.align 32
-
-ENTRY(bcopy)
- sub %o1, %o0, %o4 /* IEU0 Group */
- mov %o0, %g3 /* IEU1 */
- cmp %o4, %o2 /* IEU1 Group */
- mov %o1, %o0 /* IEU0 */
- bgeu,pt %XCC, 210f /* CTI */
- mov %g3, %o1 /* IEU0 Group */
-#ifndef USE_BPR
- srl %o2, 0, %o2 /* IEU1 */
-#endif
- brnz,pn %o2, 220f /* CTI Group */
- add %o0, %o2, %o0 /* IEU0 */
- retl
- nop
-END(bcopy)
-
- .align 32
ENTRY(__memcpy_large)
200: be,pt %xcc, 201f /* CTI */
andcc %o0, 0x38, %g5 /* IEU1 Group */
@@ -446,65 +375,6 @@ ENTRY(__memcpy_large)
mov %g4, %o0
END(__memcpy_large)
-#ifdef USE_BPR
-
- /* void *__align_cpy_4(void *dest, void *src, size_t n)
- * SPARC v9 SYSV ABI
- * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 3))
- */
-
- .align 32
-ENTRY(__align_cpy_4)
- mov %o0, %g4 /* IEU0 Group */
- cmp %o2, 15 /* IEU1 */
- bleu,pn %xcc, 208b /* CTI */
- cmp %o2, (64 * 6) /* IEU1 Group */
- bgeu,pn %xcc, 200b /* CTI */
- andcc %o0, 7, %g2 /* IEU1 Group */
- ba,pt %xcc, 216f /* CTI */
- andcc %o1, 4, %g0 /* IEU1 Group */
-END(__align_cpy_4)
-
- /* void *__align_cpy_8(void *dest, void *src, size_t n)
- * SPARC v9 SYSV ABI
- * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 7))
- */
-
- .align 32
-ENTRY(__align_cpy_8)
- mov %o0, %g4 /* IEU0 Group */
- cmp %o2, 15 /* IEU1 */
- bleu,pn %xcc, 208b /* CTI */
- cmp %o2, (64 * 6) /* IEU1 Group */
- bgeu,pn %xcc, 201b /* CTI */
- andcc %o0, 0x38, %g5 /* IEU1 Group */
- andcc %o2, -128, %g6 /* IEU1 Group */
- bne,a,pt %xcc, 82f + 4 /* CTI */
- ldx [%o1], %g1 /* Load */
- ba,pt %xcc, 41f /* CTI Group */
- andcc %o2, 0x70, %g6 /* IEU1 */
-END(__align_cpy_8)
-
- /* void *__align_cpy_16(void *dest, void *src, size_t n)
- * SPARC v9 SYSV ABI
- * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 15))
- */
-
- .align 32
-ENTRY(__align_cpy_16)
- mov %o0, %g4 /* IEU0 Group */
- cmp %o2, (64 * 6) /* IEU1 */
- bgeu,pn %xcc, 201b /* CTI */
- andcc %o0, 0x38, %g5 /* IEU1 Group */
- andcc %o2, -128, %g6 /* IEU1 Group */
- bne,a,pt %xcc, 82f + 4 /* CTI */
- ldx [%o1], %g1 /* Load */
- ba,pt %xcc, 41f /* CTI Group */
- andcc %o2, 0x70, %g6 /* IEU1 */
-END(__align_cpy_16)
-
-#endif
-
.align 32
ENTRY(memcpy)
210:
@@ -699,227 +569,4 @@ ENTRY(memcpy)
mov %g4, %o0
END(memcpy)
- .align 32
-ENTRY(__memmove_slowpath)
-228: andcc %o2, 1, %g0 /* IEU1 Group */
- be,pt %icc, 2f+4 /* CTI */
-1: ldub [%o1 - 1], %o5 /* LOAD Group */
- sub %o1, 1, %o1 /* IEU0 */
- sub %o0, 1, %o0 /* IEU1 */
- subcc %o2, 1, %o2 /* IEU1 Group */
- be,pn %xcc, 229f /* CTI */
- stb %o5, [%o0] /* Store */
-2: ldub [%o1 - 1], %o5 /* LOAD Group */
- sub %o0, 2, %o0 /* IEU0 */
- ldub [%o1 - 2], %g5 /* LOAD Group */
- sub %o1, 2, %o1 /* IEU0 */
- subcc %o2, 2, %o2 /* IEU1 Group */
- stb %o5, [%o0 + 1] /* Store */
- bne,pt %xcc, 2b /* CTI */
- stb %g5, [%o0] /* Store */
-229: retl
- mov %g4, %o0
-219: retl
- nop
-END(__memmove_slowpath)
-
- .align 32
-ENTRY(memmove)
-#ifndef USE_BPR
- srl %o2, 0, %o2 /* IEU1 Group */
-#endif
- brz,pn %o2, 219b /* CTI Group */
- sub %o0, %o1, %o4 /* IEU0 */
- cmp %o4, %o2 /* IEU1 Group */
- bgeu,pt %XCC, 218b /* CTI */
- mov %o0, %g4 /* IEU0 */
- add %o0, %o2, %o0 /* IEU0 Group */
-220: add %o1, %o2, %o1 /* IEU1 */
- cmp %o2, 15 /* IEU1 Group */
- bleu,pn %xcc, 228b /* CTI */
- andcc %o0, 7, %g2 /* IEU1 Group */
- sub %o0, %o1, %g5 /* IEU0 */
- andcc %g5, 3, %o5 /* IEU1 Group */
- bne,pn %xcc, 232f /* CTI */
- andcc %o1, 3, %g0 /* IEU1 Group */
- be,a,pt %xcc, 236f /* CTI */
- andcc %o1, 4, %g0 /* IEU1 Group */
- andcc %o1, 1, %g0 /* IEU1 Group */
- be,pn %xcc, 4f /* CTI */
- andcc %o1, 2, %g0 /* IEU1 Group */
- ldub [%o1 - 1], %g2 /* Load Group */
- sub %o1, 1, %o1 /* IEU0 */
- sub %o0, 1, %o0 /* IEU1 */
- sub %o2, 1, %o2 /* IEU0 Group */
- be,pn %xcc, 5f /* CTI Group */
- stb %g2, [%o0] /* Store */
-4: lduh [%o1 - 2], %g2 /* Load Group */
- sub %o1, 2, %o1 /* IEU0 */
- sub %o0, 2, %o0 /* IEU1 */
- sub %o2, 2, %o2 /* IEU0 */
- sth %g2, [%o0] /* Store Group + bubble */
-5: andcc %o1, 4, %g0 /* IEU1 */
-236: be,a,pn %xcc, 2f /* CTI */
- andcc %o2, -128, %g6 /* IEU1 Group */
- lduw [%o1 - 4], %g5 /* Load Group */
- sub %o1, 4, %o1 /* IEU0 */
- sub %o0, 4, %o0 /* IEU1 */
- sub %o2, 4, %o2 /* IEU0 Group */
- stw %g5, [%o0] /* Store */
- andcc %o2, -128, %g6 /* IEU1 Group */
-2: be,pn %xcc, 235f /* CTI */
- andcc %o0, 4, %g0 /* IEU1 Group */
- be,pn %xcc, 282f + 4 /* CTI Group */
-5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
- RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
- RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
- RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
- subcc %g6, 128, %g6 /* IEU1 Group */
- sub %o1, 128, %o1 /* IEU0 */
- bne,pt %xcc, 5b /* CTI */
- sub %o0, 128, %o0 /* IEU0 Group */
-235: andcc %o2, 0x70, %g6 /* IEU1 Group */
-41: be,pn %xcc, 280f /* CTI */
- andcc %o2, 8, %g0 /* IEU1 Group */
- /* Clk1 8-( */
- /* Clk2 8-( */
- /* Clk3 8-( */
- /* Clk4 8-( */
-279: rd %pc, %o5 /* PDU Group */
- sll %g6, 1, %g5 /* IEU0 Group */
- sub %o1, %g6, %o1 /* IEU1 */
- sub %o5, %g5, %o5 /* IEU0 Group */
- jmpl %o5 + %lo(280f - 279b), %g0 /* CTI Group brk forced*/
- sub %o0, %g6, %o0 /* IEU0 Group */
- RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
-280: be,pt %xcc, 281f /* CTI */
- andcc %o2, 4, %g0 /* IEU1 */
- ldx [%o1 - 8], %g2 /* Load Group */
- sub %o0, 8, %o0 /* IEU0 */
- stw %g2, [%o0 + 4] /* Store Group */
- sub %o1, 8, %o1 /* IEU1 */
- srlx %g2, 32, %g2 /* IEU0 Group */
- stw %g2, [%o0] /* Store */
-281: be,pt %xcc, 1f /* CTI */
- andcc %o2, 2, %g0 /* IEU1 Group */
- lduw [%o1 - 4], %g2 /* Load Group */
- sub %o1, 4, %o1 /* IEU0 */
- stw %g2, [%o0 - 4] /* Store Group */
- sub %o0, 4, %o0 /* IEU0 */
-1: be,pt %xcc, 1f /* CTI */
- andcc %o2, 1, %g0 /* IEU1 Group */
- lduh [%o1 - 2], %g2 /* Load Group */
- sub %o1, 2, %o1 /* IEU0 */
- sth %g2, [%o0 - 2] /* Store Group */
- sub %o0, 2, %o0 /* IEU0 */
-1: be,pt %xcc, 211f /* CTI */
- nop /* IEU1 */
- ldub [%o1 - 1], %g2 /* Load Group */
- stb %g2, [%o0 - 1] /* Store Group + bubble */
-211: retl
- mov %g4, %o0
-
-282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
- RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
- subcc %g6, 128, %g6 /* IEU1 Group */
- sub %o1, 128, %o1 /* IEU0 */
- bne,pt %xcc, 282b /* CTI */
- sub %o0, 128, %o0 /* IEU0 Group */
- andcc %o2, 0x70, %g6 /* IEU1 */
- be,pn %xcc, 284f /* CTI */
- andcc %o2, 8, %g0 /* IEU1 Group */
- /* Clk1 8-( */
- /* Clk2 8-( */
- /* Clk3 8-( */
- /* Clk4 8-( */
-283: rd %pc, %o5 /* PDU Group */
- sub %o1, %g6, %o1 /* IEU0 Group */
- sub %o5, %g6, %o5 /* IEU1 */
- jmpl %o5 + %lo(284f - 283b), %g0 /* CTI Group brk forced*/
- sub %o0, %g6, %o0 /* IEU0 Group */
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
-284: be,pt %xcc, 285f /* CTI Group */
- andcc %o2, 4, %g0 /* IEU1 */
- ldx [%o1 - 8], %g2 /* Load Group */
- sub %o0, 8, %o0 /* IEU0 */
- sub %o1, 8, %o1 /* IEU0 Group */
- stx %g2, [%o0] /* Store */
-285: be,pt %xcc, 1f /* CTI */
- andcc %o2, 2, %g0 /* IEU1 Group */
- lduw [%o1 - 4], %g2 /* Load Group */
- sub %o0, 4, %o0 /* IEU0 */
- sub %o1, 4, %o1 /* IEU0 Group */
- stw %g2, [%o0] /* Store */
-1: be,pt %xcc, 1f /* CTI */
- andcc %o2, 1, %g0 /* IEU1 Group */
- lduh [%o1 - 2], %g2 /* Load Group */
- sub %o0, 2, %o0 /* IEU0 */
- sub %o1, 2, %o1 /* IEU0 Group */
- sth %g2, [%o0] /* Store */
-1: be,pt %xcc, 1f /* CTI */
- nop /* IEU0 Group */
- ldub [%o1 - 1], %g2 /* Load Group */
- stb %g2, [%o0 - 1] /* Store Group + bubble */
-1: retl
- mov %g4, %o0
-
-232: brz,pt %g2, 2f /* CTI Group */
- sub %o2, %g2, %o2 /* IEU0 Group */
-1: ldub [%o1 - 1], %g5 /* Load Group */
- sub %o1, 1, %o1 /* IEU0 */
- sub %o0, 1, %o0 /* IEU1 */
- subcc %g2, 1, %g2 /* IEU1 Group */
- bne,pt %xcc, 1b /* CTI */
- stb %g5, [%o0] /* Store */
-2: andn %o2, 7, %g5 /* IEU0 Group */
- and %o2, 7, %o2 /* IEU1 */
- fmovd %f0, %f2 /* FPU */
- alignaddr %o1, %g0, %g1 /* GRU Group */
- ldd [%g1], %f4 /* Load Group */
-1: ldd [%g1 - 8], %f6 /* Load Group */
- sub %g1, 8, %g1 /* IEU0 Group */
- subcc %g5, 8, %g5 /* IEU1 */
- faligndata %f6, %f4, %f0 /* GRU Group */
- std %f0, [%o0 - 8] /* Store */
- sub %o1, 8, %o1 /* IEU0 Group */
- be,pn %xcc, 233f /* CTI */
- sub %o0, 8, %o0 /* IEU1 */
- ldd [%g1 - 8], %f4 /* Load Group */
- sub %g1, 8, %g1 /* IEU0 */
- subcc %g5, 8, %g5 /* IEU1 */
- faligndata %f4, %f6, %f0 /* GRU Group */
- std %f0, [%o0 - 8] /* Store */
- sub %o1, 8, %o1 /* IEU0 */
- bne,pn %xcc, 1b /* CTI Group */
- sub %o0, 8, %o0 /* IEU0 */
-233: brz,pn %o2, 234f /* CTI Group */
- nop /* IEU0 */
-237: ldub [%o1 - 1], %g5 /* LOAD */
- sub %o1, 1, %o1 /* IEU0 */
- sub %o0, 1, %o0 /* IEU1 */
- subcc %o2, 1, %o2 /* IEU1 */
- bne,pt %xcc, 237b /* CTI */
- stb %g5, [%o0] /* Store Group */
-234: wr %g0, FPRS_FEF, %fprs
- retl
- mov %g4, %o0
-END(memmove)
-
-#ifdef USE_BPR
-weak_alias (memcpy, __align_cpy_1)
-weak_alias (memcpy, __align_cpy_2)
-#endif
libc_hidden_builtin_def (memcpy)
-libc_hidden_builtin_def (memmove)
diff --git a/libc/sysdeps/sparc/sparc64/memmove.c b/libc/sysdeps/sparc/sparc64/memmove.c
deleted file mode 100644
index a8d2d4994..000000000
--- a/libc/sysdeps/sparc/sparc64/memmove.c
+++ /dev/null
@@ -1 +0,0 @@
-/* memmove is in memcpy.S */
diff --git a/libc/sysdeps/sparc/sparc64/multiarch/Makefile b/libc/sysdeps/sparc/sparc64/multiarch/Makefile
new file mode 100644
index 000000000..4d45042a9
--- /dev/null
+++ b/libc/sysdeps/sparc/sparc64/multiarch/Makefile
@@ -0,0 +1,4 @@
+ifeq ($(subdir),string)
+sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \
+ memset-niagara1
+endif
diff --git a/libc/sysdeps/sparc/sparc64/sparcv9v/memcpy.S b/libc/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S
index ad2b0f742..6a78295e8 100644
--- a/libc/sysdeps/sparc/sparc64/sparcv9v/memcpy.S
+++ b/libc/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S
@@ -36,34 +36,19 @@
#define XCC xcc
#endif
+#if !defined NOT_IN_libc
+
.register %g2,#scratch
.register %g3,#scratch
.register %g6,#scratch
.text
- .align 32
-
-ENTRY(bcopy)
- sub %o1, %o0, %o4
- mov %o0, %g4
- cmp %o4, %o2
- mov %o1, %o0
- bgeu,pt %XCC, 100f
- mov %g4, %o1
-#ifndef USE_BPR
- srl %o2, 0, %o2
-#endif
- brnz,pn %o2, 220f
- add %o0, %o2, %o0
- retl
- nop
-END(bcopy)
.align 32
-ENTRY(memcpy)
-#ifndef USE_BPR
+ENTRY(__memcpy_niagara1)
+# ifndef USE_BPR
srl %o2, 0, %o2
-#endif
+# endif
100: /* %o0=dst, %o1=src, %o2=len */
mov %o0, %g5
cmp %o2, 0
@@ -352,245 +337,6 @@ ENTRY(memcpy)
retl
mov %g5, %o0
-END(memcpy)
-
-#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src - offset - 0x20], %t0; \
- ldx [%src - offset - 0x18], %t1; \
- ldx [%src - offset - 0x10], %t2; \
- ldx [%src - offset - 0x08], %t3; \
- stw %t0, [%dst - offset - 0x1c]; \
- srlx %t0, 32, %t0; \
- stw %t0, [%dst - offset - 0x20]; \
- stw %t1, [%dst - offset - 0x14]; \
- srlx %t1, 32, %t1; \
- stw %t1, [%dst - offset - 0x18]; \
- stw %t2, [%dst - offset - 0x0c]; \
- srlx %t2, 32, %t2; \
- stw %t2, [%dst - offset - 0x10]; \
- stw %t3, [%dst - offset - 0x04]; \
- srlx %t3, 32, %t3; \
- stw %t3, [%dst - offset - 0x08];
-
-#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src - offset - 0x20], %t0; \
- ldx [%src - offset - 0x18], %t1; \
- ldx [%src - offset - 0x10], %t2; \
- ldx [%src - offset - 0x08], %t3; \
- stx %t0, [%dst - offset - 0x20]; \
- stx %t1, [%dst - offset - 0x18]; \
- stx %t2, [%dst - offset - 0x10]; \
- stx %t3, [%dst - offset - 0x08]; \
- ldx [%src - offset - 0x40], %t0; \
- ldx [%src - offset - 0x38], %t1; \
- ldx [%src - offset - 0x30], %t2; \
- ldx [%src - offset - 0x28], %t3; \
- stx %t0, [%dst - offset - 0x40]; \
- stx %t1, [%dst - offset - 0x38]; \
- stx %t2, [%dst - offset - 0x30]; \
- stx %t3, [%dst - offset - 0x28];
-
-#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src + offset + 0x00], %t0; \
- ldx [%src + offset + 0x08], %t1; \
- stw %t0, [%dst + offset + 0x04]; \
- srlx %t0, 32, %t2; \
- stw %t2, [%dst + offset + 0x00]; \
- stw %t1, [%dst + offset + 0x0c]; \
- srlx %t1, 32, %t3; \
- stw %t3, [%dst + offset + 0x08];
-
-#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \
- ldx [%src + offset + 0x00], %t0; \
- ldx [%src + offset + 0x08], %t1; \
- stx %t0, [%dst + offset + 0x00]; \
- stx %t1, [%dst + offset + 0x08];
+END(__memcpy_niagara1)
- .align 32
-228: andcc %o2, 1, %g0
- be,pt %icc, 2f+4
-1: ldub [%o1 - 1], %o5
- sub %o1, 1, %o1
- sub %o0, 1, %o0
- subcc %o2, 1, %o2
- be,pn %xcc, 229f
- stb %o5, [%o0]
-2: ldub [%o1 - 1], %o5
- sub %o0, 2, %o0
- ldub [%o1 - 2], %g5
- sub %o1, 2, %o1
- subcc %o2, 2, %o2
- stb %o5, [%o0 + 1]
- bne,pt %xcc, 2b
- stb %g5, [%o0]
-229: retl
- mov %g4, %o0
-out: retl
- mov %g5, %o0
-
- .align 32
-ENTRY(memmove)
- mov %o0, %g5
-#ifndef USE_BPR
- srl %o2, 0, %o2
-#endif
- brz,pn %o2, out
- sub %o0, %o1, %o4
- cmp %o4, %o2
- bgeu,pt %XCC, 218b
- mov %o0, %g4
- add %o0, %o2, %o0
-220: add %o1, %o2, %o1
- cmp %o2, 15
- bleu,pn %xcc, 228b
- andcc %o0, 7, %g2
- sub %o0, %o1, %g5
- andcc %g5, 3, %o5
- bne,pn %xcc, 232f
- andcc %o1, 3, %g0
- be,a,pt %xcc, 236f
- andcc %o1, 4, %g0
- andcc %o1, 1, %g0
- be,pn %xcc, 4f
- andcc %o1, 2, %g0
- ldub [%o1 - 1], %g2
- sub %o1, 1, %o1
- sub %o0, 1, %o0
- sub %o2, 1, %o2
- be,pn %xcc, 5f
- stb %g2, [%o0]
-4: lduh [%o1 - 2], %g2
- sub %o1, 2, %o1
- sub %o0, 2, %o0
- sub %o2, 2, %o2
- sth %g2, [%o0]
-5: andcc %o1, 4, %g0
-236: be,a,pn %xcc, 2f
- andcc %o2, -128, %g6
- lduw [%o1 - 4], %g5
- sub %o1, 4, %o1
- sub %o0, 4, %o0
- sub %o2, 4, %o2
- stw %g5, [%o0]
- andcc %o2, -128, %g6
-2: be,pn %xcc, 235f
- andcc %o0, 4, %g0
- be,pn %xcc, 282f + 4
-5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
- RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
- RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
- RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
- subcc %g6, 128, %g6
- sub %o1, 128, %o1
- bne,pt %xcc, 5b
- sub %o0, 128, %o0
-235: andcc %o2, 0x70, %g6
-41: be,pn %xcc, 280f
- andcc %o2, 8, %g0
-
-279: rd %pc, %o5
- sll %g6, 1, %g5
- sub %o1, %g6, %o1
- sub %o5, %g5, %o5
- jmpl %o5 + %lo(280f - 279b), %g0
- sub %o0, %g6, %o0
- RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
-280: be,pt %xcc, 281f
- andcc %o2, 4, %g0
- ldx [%o1 - 8], %g2
- sub %o0, 8, %o0
- stw %g2, [%o0 + 4]
- sub %o1, 8, %o1
- srlx %g2, 32, %g2
- stw %g2, [%o0]
-281: be,pt %xcc, 1f
- andcc %o2, 2, %g0
- lduw [%o1 - 4], %g2
- sub %o1, 4, %o1
- stw %g2, [%o0 - 4]
- sub %o0, 4, %o0
-1: be,pt %xcc, 1f
- andcc %o2, 1, %g0
- lduh [%o1 - 2], %g2
- sub %o1, 2, %o1
- sth %g2, [%o0 - 2]
- sub %o0, 2, %o0
-1: be,pt %xcc, 211f
- nop
- ldub [%o1 - 1], %g2
- stb %g2, [%o0 - 1]
-211: retl
- mov %g4, %o0
-
-282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
- RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
- subcc %g6, 128, %g6
- sub %o1, 128, %o1
- bne,pt %xcc, 282b
- sub %o0, 128, %o0
- andcc %o2, 0x70, %g6
- be,pn %xcc, 284f
- andcc %o2, 8, %g0
-
-283: rd %pc, %o5
- sub %o1, %g6, %o1
- sub %o5, %g6, %o5
- jmpl %o5 + %lo(284f - 283b), %g0
- sub %o0, %g6, %o0
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
-284: be,pt %xcc, 285f
- andcc %o2, 4, %g0
- ldx [%o1 - 8], %g2
- sub %o0, 8, %o0
- sub %o1, 8, %o1
- stx %g2, [%o0]
-285: be,pt %xcc, 1f
- andcc %o2, 2, %g0
- lduw [%o1 - 4], %g2
- sub %o0, 4, %o0
- sub %o1, 4, %o1
- stw %g2, [%o0]
-1: be,pt %xcc, 1f
- andcc %o2, 1, %g0
- lduh [%o1 - 2], %g2
- sub %o0, 2, %o0
- sub %o1, 2, %o1
- sth %g2, [%o0]
-1: be,pt %xcc, 1f
- nop
- ldub [%o1 - 1], %g2
- stb %g2, [%o0 - 1]
-1: retl
- mov %g4, %o0
-
-232: ldub [%o1 - 1], %g5
- sub %o1, 1, %o1
- sub %o0, 1, %o0
- subcc %o2, 1, %o2
- bne,pt %xcc, 232b
- stb %g5, [%o0]
-234: retl
- mov %g4, %o0
-END(memmove)
-
-#ifdef USE_BPR
-weak_alias (memcpy, __align_cpy_1)
-weak_alias (memcpy, __align_cpy_2)
-weak_alias (memcpy, __align_cpy_4)
-weak_alias (memcpy, __align_cpy_8)
-weak_alias (memcpy, __align_cpy_16)
#endif
-libc_hidden_builtin_def (memcpy)
-libc_hidden_builtin_def (memmove)
diff --git a/libc/sysdeps/sparc/sparc64/sparcv9v2/memcpy.S b/libc/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S
index b261f461a..35f6989ac 100644
--- a/libc/sysdeps/sparc/sparc64/sparcv9v2/memcpy.S
+++ b/libc/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S
@@ -138,34 +138,19 @@
LOAD(ldd, base + 0x28, %x5); \
LOAD(ldd, base + 0x30, %x6);
+#if !defined NOT_IN_libc
+
.register %g2,#scratch
.register %g3,#scratch
.register %g6,#scratch
.text
- .align 32
-
-ENTRY(bcopy)
- sub %o1, %o0, %o4
- mov %o0, %g4
- cmp %o4, %o2
- mov %o1, %o0
- bgeu,pt %XCC, 100f
- mov %g4, %o1
-#ifndef USE_BPR
- srl %o2, 0, %o2
-#endif
- brnz,pn %o2, 220f
- add %o0, %o2, %o0
- retl
- nop
-END(bcopy)
.align 32
-ENTRY(memcpy)
-#ifndef USE_BPR
+ENTRY(__memcpy_niagara2)
+# ifndef USE_BPR
srl %o2, 0, %o2
-#endif
+# endif
100: /* %o0=dst, %o1=src, %o2=len */
mov %o0, %g5
cmp %o2, 0
@@ -502,245 +487,6 @@ ENTRY(memcpy)
retl
mov %g5, %o0
-END(memcpy)
-
-#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src - offset - 0x20], %t0; \
- ldx [%src - offset - 0x18], %t1; \
- ldx [%src - offset - 0x10], %t2; \
- ldx [%src - offset - 0x08], %t3; \
- stw %t0, [%dst - offset - 0x1c]; \
- srlx %t0, 32, %t0; \
- stw %t0, [%dst - offset - 0x20]; \
- stw %t1, [%dst - offset - 0x14]; \
- srlx %t1, 32, %t1; \
- stw %t1, [%dst - offset - 0x18]; \
- stw %t2, [%dst - offset - 0x0c]; \
- srlx %t2, 32, %t2; \
- stw %t2, [%dst - offset - 0x10]; \
- stw %t3, [%dst - offset - 0x04]; \
- srlx %t3, 32, %t3; \
- stw %t3, [%dst - offset - 0x08];
-
-#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src - offset - 0x20], %t0; \
- ldx [%src - offset - 0x18], %t1; \
- ldx [%src - offset - 0x10], %t2; \
- ldx [%src - offset - 0x08], %t3; \
- stx %t0, [%dst - offset - 0x20]; \
- stx %t1, [%dst - offset - 0x18]; \
- stx %t2, [%dst - offset - 0x10]; \
- stx %t3, [%dst - offset - 0x08]; \
- ldx [%src - offset - 0x40], %t0; \
- ldx [%src - offset - 0x38], %t1; \
- ldx [%src - offset - 0x30], %t2; \
- ldx [%src - offset - 0x28], %t3; \
- stx %t0, [%dst - offset - 0x40]; \
- stx %t1, [%dst - offset - 0x38]; \
- stx %t2, [%dst - offset - 0x30]; \
- stx %t3, [%dst - offset - 0x28];
-
-#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src + offset + 0x00], %t0; \
- ldx [%src + offset + 0x08], %t1; \
- stw %t0, [%dst + offset + 0x04]; \
- srlx %t0, 32, %t2; \
- stw %t2, [%dst + offset + 0x00]; \
- stw %t1, [%dst + offset + 0x0c]; \
- srlx %t1, 32, %t3; \
- stw %t3, [%dst + offset + 0x08];
-
-#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \
- ldx [%src + offset + 0x00], %t0; \
- ldx [%src + offset + 0x08], %t1; \
- stx %t0, [%dst + offset + 0x00]; \
- stx %t1, [%dst + offset + 0x08];
+END(__memcpy_niagara2)
- .align 32
-228: andcc %o2, 1, %g0
- be,pt %icc, 2f+4
-1: ldub [%o1 - 1], %o5
- sub %o1, 1, %o1
- sub %o0, 1, %o0
- subcc %o2, 1, %o2
- be,pn %XCC, 229f
- stb %o5, [%o0]
-2: ldub [%o1 - 1], %o5
- sub %o0, 2, %o0
- ldub [%o1 - 2], %g5
- sub %o1, 2, %o1
- subcc %o2, 2, %o2
- stb %o5, [%o0 + 1]
- bne,pt %XCC, 2b
- stb %g5, [%o0]
-229: retl
- mov %g4, %o0
-out: retl
- mov %g5, %o0
-
- .align 32
-ENTRY(memmove)
- mov %o0, %g5
-#ifndef USE_BPR
- srl %o2, 0, %o2
-#endif
- brz,pn %o2, out
- sub %o0, %o1, %o4
- cmp %o4, %o2
- bgeu,pt %XCC, 218b
- mov %o0, %g4
- add %o0, %o2, %o0
-220: add %o1, %o2, %o1
- cmp %o2, 15
- bleu,pn %XCC, 228b
- andcc %o0, 7, %g2
- sub %o0, %o1, %g5
- andcc %g5, 3, %o5
- bne,pn %XCC, 232f
- andcc %o1, 3, %g0
- be,a,pt %XCC, 236f
- andcc %o1, 4, %g0
- andcc %o1, 1, %g0
- be,pn %XCC, 4f
- andcc %o1, 2, %g0
- ldub [%o1 - 1], %g2
- sub %o1, 1, %o1
- sub %o0, 1, %o0
- sub %o2, 1, %o2
- be,pn %XCC, 5f
- stb %g2, [%o0]
-4: lduh [%o1 - 2], %g2
- sub %o1, 2, %o1
- sub %o0, 2, %o0
- sub %o2, 2, %o2
- sth %g2, [%o0]
-5: andcc %o1, 4, %g0
-236: be,a,pn %XCC, 2f
- andcc %o2, -128, %g6
- lduw [%o1 - 4], %g5
- sub %o1, 4, %o1
- sub %o0, 4, %o0
- sub %o2, 4, %o2
- stw %g5, [%o0]
- andcc %o2, -128, %g6
-2: be,pn %XCC, 235f
- andcc %o0, 4, %g0
- be,pn %XCC, 282f + 4
-5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
- RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
- RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
- RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
- subcc %g6, 128, %g6
- sub %o1, 128, %o1
- bne,pt %XCC, 5b
- sub %o0, 128, %o0
-235: andcc %o2, 0x70, %g6
-41: be,pn %XCC, 280f
- andcc %o2, 8, %g0
-
-279: rd %pc, %o5
- sll %g6, 1, %g5
- sub %o1, %g6, %o1
- sub %o5, %g5, %o5
- jmpl %o5 + %lo(280f - 279b), %g0
- sub %o0, %g6, %o0
- RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
-280: be,pt %XCC, 281f
- andcc %o2, 4, %g0
- ldx [%o1 - 8], %g2
- sub %o0, 8, %o0
- stw %g2, [%o0 + 4]
- sub %o1, 8, %o1
- srlx %g2, 32, %g2
- stw %g2, [%o0]
-281: be,pt %XCC, 1f
- andcc %o2, 2, %g0
- lduw [%o1 - 4], %g2
- sub %o1, 4, %o1
- stw %g2, [%o0 - 4]
- sub %o0, 4, %o0
-1: be,pt %XCC, 1f
- andcc %o2, 1, %g0
- lduh [%o1 - 2], %g2
- sub %o1, 2, %o1
- sth %g2, [%o0 - 2]
- sub %o0, 2, %o0
-1: be,pt %XCC, 211f
- nop
- ldub [%o1 - 1], %g2
- stb %g2, [%o0 - 1]
-211: retl
- mov %g4, %o0
-
-282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
- RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
- subcc %g6, 128, %g6
- sub %o1, 128, %o1
- bne,pt %XCC, 282b
- sub %o0, 128, %o0
- andcc %o2, 0x70, %g6
- be,pn %XCC, 284f
- andcc %o2, 8, %g0
-
-283: rd %pc, %o5
- sub %o1, %g6, %o1
- sub %o5, %g6, %o5
- jmpl %o5 + %lo(284f - 283b), %g0
- sub %o0, %g6, %o0
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
-284: be,pt %XCC, 285f
- andcc %o2, 4, %g0
- ldx [%o1 - 8], %g2
- sub %o0, 8, %o0
- sub %o1, 8, %o1
- stx %g2, [%o0]
-285: be,pt %XCC, 1f
- andcc %o2, 2, %g0
- lduw [%o1 - 4], %g2
- sub %o0, 4, %o0
- sub %o1, 4, %o1
- stw %g2, [%o0]
-1: be,pt %XCC, 1f
- andcc %o2, 1, %g0
- lduh [%o1 - 2], %g2
- sub %o0, 2, %o0
- sub %o1, 2, %o1
- sth %g2, [%o0]
-1: be,pt %XCC, 1f
- nop
- ldub [%o1 - 1], %g2
- stb %g2, [%o0 - 1]
-1: retl
- mov %g4, %o0
-
-232: ldub [%o1 - 1], %g5
- sub %o1, 1, %o1
- sub %o0, 1, %o0
- subcc %o2, 1, %o2
- bne,pt %XCC, 232b
- stb %g5, [%o0]
-234: retl
- mov %g4, %o0
-END(memmove)
-
-#ifdef USE_BPR
-weak_alias (memcpy, __align_cpy_1)
-weak_alias (memcpy, __align_cpy_2)
-weak_alias (memcpy, __align_cpy_4)
-weak_alias (memcpy, __align_cpy_8)
-weak_alias (memcpy, __align_cpy_16)
#endif
-libc_hidden_builtin_def (memcpy)
-libc_hidden_builtin_def (memmove)
diff --git a/libc/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S b/libc/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S
new file mode 100644
index 000000000..34ca089f9
--- /dev/null
+++ b/libc/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S
@@ -0,0 +1,320 @@
+/* Copy SIZE bytes from SRC to DEST.
+ For UltraSPARC-III.
+ Copyright (C) 2001, 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by David S. Miller (davem@redhat.com)
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+
+#define ASI_BLK_P 0xf0
+#define FPRS_FEF 0x04
+#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
+#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
+
+#ifndef XCC
+#define USE_BPR
+#define XCC xcc
+#endif
+
+#if !defined NOT_IN_libc
+
+ .register %g2,#scratch
+ .register %g3,#scratch
+ .register %g6,#scratch
+
+ .text
+
+ /* Special/non-trivial issues of this code:
+ *
+ * 1) %o5 is preserved from VISEntryHalf to VISExitHalf
+ * 2) Only low 32 FPU registers are used so that only the
+ * lower half of the FPU register set is dirtied by this
+ * code. This is especially important in the kernel.
+ * 3) This code never prefetches cachelines past the end
+ * of the source buffer.
+ *
+ * The cheetah's flexible spine, oversized liver, enlarged heart,
+ * slender muscular body, and claws make it the swiftest hunter
+ * in Africa and the fastest animal on land. Can reach speeds
+ * of up to 2.4GB per second.
+ */
+ .align 32
+ENTRY(__memcpy_ultra3)
+
+100: /* %o0=dst, %o1=src, %o2=len */
+ mov %o0, %g5
+ cmp %o2, 0
+ be,pn %XCC, out
+218: or %o0, %o1, %o3
+ cmp %o2, 16
+ bleu,a,pn %XCC, small_copy
+ or %o3, %o2, %o3
+
+ cmp %o2, 256
+ blu,pt %XCC, medium_copy
+ andcc %o3, 0x7, %g0
+
+ ba,pt %xcc, enter
+ andcc %o0, 0x3f, %g2
+
+ /* Here len >= 256 and condition codes reflect execution
+ * of "andcc %o0, 0x7, %g2", done by caller.
+ */
+ .align 64
+enter:
+ /* Is 'dst' already aligned on an 64-byte boundary? */
+ be,pt %XCC, 2f
+
+ /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number
+ * of bytes to copy to make 'dst' 64-byte aligned. We pre-
+ * subtract this from 'len'.
+ */
+ sub %g2, 0x40, %g2
+ sub %g0, %g2, %g2
+ sub %o2, %g2, %o2
+
+ /* Copy %g2 bytes from src to dst, one byte at a time. */
+1: ldub [%o1 + 0x00], %o3
+ add %o1, 0x1, %o1
+ add %o0, 0x1, %o0
+ subcc %g2, 0x1, %g2
+
+ bg,pt %XCC, 1b
+ stb %o3, [%o0 + -1]
+
+2: VISEntryHalf
+ and %o1, 0x7, %g1
+ ba,pt %xcc, begin
+ alignaddr %o1, %g0, %o1
+
+ .align 64
+begin:
+ prefetch [%o1 + 0x000], #one_read
+ prefetch [%o1 + 0x040], #one_read
+ andn %o2, (0x40 - 1), %o4
+ prefetch [%o1 + 0x080], #one_read
+ prefetch [%o1 + 0x0c0], #one_read
+ ldd [%o1 + 0x000], %f0
+ prefetch [%o1 + 0x100], #one_read
+ ldd [%o1 + 0x008], %f2
+ prefetch [%o1 + 0x140], #one_read
+ ldd [%o1 + 0x010], %f4
+ prefetch [%o1 + 0x180], #one_read
+ faligndata %f0, %f2, %f16
+ ldd [%o1 + 0x018], %f6
+ faligndata %f2, %f4, %f18
+ ldd [%o1 + 0x020], %f8
+ faligndata %f4, %f6, %f20
+ ldd [%o1 + 0x028], %f10
+ faligndata %f6, %f8, %f22
+
+ ldd [%o1 + 0x030], %f12
+ faligndata %f8, %f10, %f24
+ ldd [%o1 + 0x038], %f14
+ faligndata %f10, %f12, %f26
+ ldd [%o1 + 0x040], %f0
+
+ sub %o4, 0x80, %o4
+ add %o1, 0x40, %o1
+ ba,pt %xcc, loop
+ srl %o4, 6, %o3
+
+ .align 64
+loop:
+ ldd [%o1 + 0x008], %f2
+ faligndata %f12, %f14, %f28
+ ldd [%o1 + 0x010], %f4
+ faligndata %f14, %f0, %f30
+ stda %f16, [%o0] ASI_BLK_P
+ ldd [%o1 + 0x018], %f6
+ faligndata %f0, %f2, %f16
+
+ ldd [%o1 + 0x020], %f8
+ faligndata %f2, %f4, %f18
+ ldd [%o1 + 0x028], %f10
+ faligndata %f4, %f6, %f20
+ ldd [%o1 + 0x030], %f12
+ faligndata %f6, %f8, %f22
+ ldd [%o1 + 0x038], %f14
+ faligndata %f8, %f10, %f24
+
+ ldd [%o1 + 0x040], %f0
+ prefetch [%o1 + 0x180], #one_read
+ faligndata %f10, %f12, %f26
+ subcc %o3, 0x01, %o3
+ add %o1, 0x40, %o1
+ bg,pt %XCC, loop
+ add %o0, 0x40, %o0
+
+ /* Finally we copy the last full 64-byte block. */
+loopfini:
+ ldd [%o1 + 0x008], %f2
+ faligndata %f12, %f14, %f28
+ ldd [%o1 + 0x010], %f4
+ faligndata %f14, %f0, %f30
+ stda %f16, [%o0] ASI_BLK_P
+ ldd [%o1 + 0x018], %f6
+ faligndata %f0, %f2, %f16
+ ldd [%o1 + 0x020], %f8
+ faligndata %f2, %f4, %f18
+ ldd [%o1 + 0x028], %f10
+ faligndata %f4, %f6, %f20
+ ldd [%o1 + 0x030], %f12
+ faligndata %f6, %f8, %f22
+ ldd [%o1 + 0x038], %f14
+ faligndata %f8, %f10, %f24
+ cmp %g1, 0
+ be,pt %XCC, 1f
+ add %o0, 0x40, %o0
+ ldd [%o1 + 0x040], %f0
+1: faligndata %f10, %f12, %f26
+ faligndata %f12, %f14, %f28
+ faligndata %f14, %f0, %f30
+ stda %f16, [%o0] ASI_BLK_P
+ add %o0, 0x40, %o0
+ add %o1, 0x40, %o1
+ membar #Sync
+
+ /* Now we copy the (len modulo 64) bytes at the end.
+ * Note how we borrow the %f0 loaded above.
+ *
+ * Also notice how this code is careful not to perform a
+ * load past the end of the src buffer.
+ */
+loopend:
+ and %o2, 0x3f, %o2
+ andcc %o2, 0x38, %g2
+ be,pn %XCC, endcruft
+ subcc %g2, 0x8, %g2
+ be,pn %XCC, endcruft
+ cmp %g1, 0
+
+ be,a,pt %XCC, 1f
+ ldd [%o1 + 0x00], %f0
+
+1: ldd [%o1 + 0x08], %f2
+ add %o1, 0x8, %o1
+ sub %o2, 0x8, %o2
+ subcc %g2, 0x8, %g2
+ faligndata %f0, %f2, %f8
+ std %f8, [%o0 + 0x00]
+ be,pn %XCC, endcruft
+ add %o0, 0x8, %o0
+ ldd [%o1 + 0x08], %f0
+ add %o1, 0x8, %o1
+ sub %o2, 0x8, %o2
+ subcc %g2, 0x8, %g2
+ faligndata %f2, %f0, %f8
+ std %f8, [%o0 + 0x00]
+ bne,pn %XCC, 1b
+ add %o0, 0x8, %o0
+
+ /* If anything is left, we copy it one byte at a time.
+ * Note that %g1 is (src & 0x3) saved above before the
+ * alignaddr was performed.
+ */
+endcruft:
+ cmp %o2, 0
+ add %o1, %g1, %o1
+ VISExitHalf
+ be,pn %XCC, out
+ sub %o0, %o1, %o3
+
+ andcc %g1, 0x7, %g0
+ bne,pn %icc, small_copy_unaligned
+ andcc %o2, 0x8, %g0
+ be,pt %icc, 1f
+ nop
+ ldx [%o1], %o5
+ stx %o5, [%o1 + %o3]
+ add %o1, 0x8, %o1
+
+1: andcc %o2, 0x4, %g0
+ be,pt %icc, 1f
+ nop
+ lduw [%o1], %o5
+ stw %o5, [%o1 + %o3]
+ add %o1, 0x4, %o1
+
+1: andcc %o2, 0x2, %g0
+ be,pt %icc, 1f
+ nop
+ lduh [%o1], %o5
+ sth %o5, [%o1 + %o3]
+ add %o1, 0x2, %o1
+
+1: andcc %o2, 0x1, %g0
+ be,pt %icc, out
+ nop
+ ldub [%o1], %o5
+ ba,pt %xcc, out
+ stb %o5, [%o1 + %o3]
+
+medium_copy: /* 16 < len <= 64 */
+ bne,pn %XCC, small_copy_unaligned
+ sub %o0, %o1, %o3
+
+medium_copy_aligned:
+ andn %o2, 0x7, %o4
+ and %o2, 0x7, %o2
+1: subcc %o4, 0x8, %o4
+ ldx [%o1], %o5
+ stx %o5, [%o1 + %o3]
+ bgu,pt %XCC, 1b
+ add %o1, 0x8, %o1
+ andcc %o2, 0x4, %g0
+ be,pt %XCC, 1f
+ nop
+ sub %o2, 0x4, %o2
+ lduw [%o1], %o5
+ stw %o5, [%o1 + %o3]
+ add %o1, 0x4, %o1
+1: cmp %o2, 0
+ be,pt %XCC, out
+ nop
+ ba,pt %xcc, small_copy_unaligned
+ nop
+
+small_copy: /* 0 < len <= 16 */
+ andcc %o3, 0x3, %g0
+ bne,pn %XCC, small_copy_unaligned
+ sub %o0, %o1, %o3
+
+small_copy_aligned:
+ subcc %o2, 4, %o2
+ lduw [%o1], %g1
+ stw %g1, [%o1 + %o3]
+ bgu,pt %XCC, small_copy_aligned
+ add %o1, 4, %o1
+
+out: retl
+ mov %g5, %o0
+
+ .align 32
+small_copy_unaligned:
+ subcc %o2, 1, %o2
+ ldub [%o1], %g1
+ stb %g1, [%o1 + %o3]
+ bgu,pt %XCC, small_copy_unaligned
+ add %o1, 1, %o1
+ retl
+ mov %g5, %o0
+
+END(__memcpy_ultra3)
+
+#endif \ No newline at end of file
diff --git a/libc/sysdeps/sparc/sparc64/multiarch/memcpy.S b/libc/sysdeps/sparc/sparc64/multiarch/memcpy.S
new file mode 100644
index 000000000..a708de10e
--- /dev/null
+++ b/libc/sysdeps/sparc/sparc64/multiarch/memcpy.S
@@ -0,0 +1,107 @@
+/* Multiple versions of memcpy
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by David S. Miller (davem@davemloft.net)
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <rtld-global-offsets.h>
+
+#if !defined NOT_IN_libc
+ .text
+ENTRY(memcpy)
+ .type memcpy, @gnu_indirect_function
+# ifdef SHARED
+ mov %o7, %o5
+ sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %o3
+ call 1f
+ or %o3, %lo(_GLOBAL_OFFSET_TABLE_+4), %o3
+1: add %o7, %o3, %o3
+ mov %o5, %o7
+ sethi %hi(_rtld_global_ro), %o2
+ or %o2, %lo(_rtld_global_ro), %o2
+# ifdef __arch64__
+ ldx [%o3 + %o2], %o2
+ ldx [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET], %o2
+# else
+ ld [%o3 + %o2], %o2
+ ld [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET + 4], %o2
+# endif
+# else
+ set _dl_hwcap, %o3
+# ifdef __arch64__
+ ldx [%o3], %o2
+# else
+ ld [%o3 + 4], %o2
+# endif
+# endif
+ andcc %o2, 0x80, %g0 ! HWCAP_SPARC_N2
+ be 1f
+ andcc %o2, 0x40, %g0 ! HWCAP_SPARC_BLKINIT
+# ifdef SHARED
+ sethi %gdop_hix22(__memcpy_niagara2), %o1
+ xor %o1, %gdop_lox10(__memcpy_niagara2), %o1
+# else
+ set __memcpy_niagara2, %o1
+# endif
+ ba 10f
+ nop
+1: be 1f
+ andcc %o2, 0x20, %g0 ! HWCAP_SPARC_ULTRA3
+# ifdef SHARED
+ sethi %gdop_hix22(__memcpy_niagara1), %o1
+ xor %o1, %gdop_lox10(__memcpy_niagara1), %o1
+# else
+ set __memcpy_niagara1, %o1
+# endif
+ ba 10f
+ nop
+1: be 9f
+ nop
+# ifdef SHARED
+ sethi %gdop_hix22(__memcpy_ultra3), %o1
+ xor %o1, %gdop_lox10(__memcpy_ultra3), %o1
+# else
+ set __memcpy_ultra3, %o1
+# endif
+ ba 10f
+ nop
+9:
+# ifdef SHARED
+ sethi %gdop_hix22(__memcpy_ultra1), %o1
+ xor %o1, %gdop_lox10(__memcpy_ultra1), %o1
+# else
+ set __memcpy_ultra1, %o1
+# endif
+10:
+# ifdef SHARED
+ add %o3, %o1, %o1
+# endif
+ retl
+ mov %o1, %o0
+END(memcpy)
+
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in a shared library. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_memcpy; __GI_memcpy = __memcpy_ultra1
+
+#define memcpy __memcpy_ultra1
+
+#endif
+
+#include "../memcpy.S"
diff --git a/libc/sysdeps/sparc/sparc64/sparcv9v/memset.S b/libc/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S
index 64817b887..20ea05621 100644
--- a/libc/sysdeps/sparc/sparc64/sparcv9v/memset.S
+++ b/libc/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S
@@ -29,12 +29,14 @@
#define XCC xcc
#endif
+#if !defined NOT_IN_libc
+
.register %g2,#scratch
.text
.align 32
-ENTRY(memset)
+ENTRY(__memset_niagara1)
/* %o0=buf, %o1=pat, %o2=len */
and %o1, 0xff, %o3
mov %o2, %o1
@@ -45,14 +47,14 @@ ENTRY(memset)
sllx %o2, 32, %g1
ba,pt %XCC, 1f
or %g1, %o2, %o2
-END(memset)
+END(__memset_niagara1)
-ENTRY(__bzero)
+ENTRY(__bzero_niagara1)
clr %o2
1:
-#ifndef USE_BRP
+# ifndef USE_BRP
srl %o1, 0, %o1
-#endif
+# endif
brz,pn %o1, 90f
mov %o0, %o3
@@ -125,7 +127,6 @@ ENTRY(__bzero)
90:
retl
mov %o3, %o0
-END(__bzero)
+END(__bzero_niagara1)
-libc_hidden_builtin_def (memset)
-weak_alias (__bzero, bzero)
+#endif
diff --git a/libc/sysdeps/sparc/sparc64/multiarch/memset.S b/libc/sysdeps/sparc/sparc64/multiarch/memset.S
new file mode 100644
index 000000000..23e513f18
--- /dev/null
+++ b/libc/sysdeps/sparc/sparc64/multiarch/memset.S
@@ -0,0 +1,145 @@
+/* Multiple versions of memset and bzero
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by David S. Miller (davem@davemloft.net)
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <rtld-global-offsets.h>
+
+#if !defined NOT_IN_libc
+ .text
+ENTRY(memset)
+ .type memset, @gnu_indirect_function
+# ifdef SHARED
+ mov %o7, %o5
+ sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %o3
+ call 1f
+ or %o3, %lo(_GLOBAL_OFFSET_TABLE_+4), %o3
+1: add %o7, %o3, %o3
+ mov %o5, %o7
+ sethi %hi(_rtld_global_ro), %o2
+ or %o2, %lo(_rtld_global_ro), %o2
+# ifdef __arch64__
+ ldx [%o3 + %o2], %o2
+ ldx [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET], %o2
+# else
+ ld [%o3 + %o2], %o2
+ ld [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET + 4], %o2
+# endif
+# else
+ set _dl_hwcap, %o3
+# ifdef __arch64__
+ ldx [%o3], %o2
+# else
+ ld [%o3 + 4], %o2
+# endif
+# endif
+ andcc %o2, 0x40, %g0 ! HWCAP_SPARC_BLKINIT
+ be 9f
+ nop
+# ifdef SHARED
+ sethi %gdop_hix22(__memset_niagara1), %o1
+ xor %o1, %gdop_lox10(__memset_niagara1), %o1
+# else
+ set __memset_niagara1, %o1
+# endif
+ ba 10f
+ nop
+9:
+# ifdef SHARED
+ sethi %gdop_hix22(__memset_ultra1), %o1
+ xor %o1, %gdop_lox10(__memset_ultra1), %o1
+# else
+ set __memset_ultra1, %o1
+# endif
+10:
+# ifdef SHARED
+ add %o3, %o1, %o1
+# endif
+ retl
+ mov %o1, %o0
+END(memset)
+
+ENTRY(__bzero)
+ .type bzero, @gnu_indirect_function
+# ifdef SHARED
+ mov %o7, %o5
+ sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %o3
+ call 1f
+ or %o3, %lo(_GLOBAL_OFFSET_TABLE_+4), %o3
+1: add %o7, %o3, %o3
+ mov %o5, %o7
+ sethi %hi(_rtld_global_ro), %o2
+ or %o2, %lo(_rtld_global_ro), %o2
+# ifdef __arch64__
+ ldx [%o3 + %o2], %o2
+ ldx [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET], %o2
+# else
+ ld [%o3 + %o2], %o2
+ ld [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET + 4], %o2
+# endif
+# else
+ set _dl_hwcap, %o3
+# ifdef __arch64__
+ ldx [%o3], %o2
+# else
+ ld [%o3 + 4], %o2
+# endif
+# endif
+ andcc %o2, 0x40, %g0 ! HWCAP_SPARC_BLKINIT
+ be 9f
+ nop
+# ifdef SHARED
+ sethi %gdop_hix22(__bzero_niagara1), %o1
+ xor %o1, %gdop_lox10(__bzero_niagara1), %o1
+# else
+ set __bzero_niagara1, %o1
+# endif
+ ba 10f
+ nop
+9:
+# ifdef SHARED
+ sethi %gdop_hix22(__memset_ultra1), %o1
+ xor %o1, %gdop_lox10(__memset_ultra1), %o1
+# else
+ set __bzero_ultra1, %o1
+# endif
+10:
+# ifdef SHARED
+ add %o3, %o1, %o1
+# endif
+ retl
+ mov %o1, %o0
+END(__bzero)
+
+weak_alias (__bzero, bzero)
+
+# undef weak_alias
+# define weak_alias(a, b)
+
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in a shared library. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_memset; __GI_memset = __memset_ultra1
+
+#define memset __memset_ultra1
+#define __bzero __bzero_ultra1
+
+#endif
+
+#include "../memset.S"
diff --git a/libc/sysdeps/sparc/sparc64/sparcv9b/memcpy.S b/libc/sysdeps/sparc/sparc64/sparcv9b/memcpy.S
deleted file mode 100644
index 760d52663..000000000
--- a/libc/sysdeps/sparc/sparc64/sparcv9b/memcpy.S
+++ /dev/null
@@ -1,610 +0,0 @@
-/* Copy SIZE bytes from SRC to DEST.
- For UltraSPARC-III.
- Copyright (C) 2001, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by David S. Miller (davem@redhat.com)
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <sysdep.h>
-
-#define ASI_BLK_P 0xf0
-#define FPRS_FEF 0x04
-#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
-#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
-
-#ifndef XCC
-#define USE_BPR
-#define XCC xcc
-#endif
-
- .register %g2,#scratch
- .register %g3,#scratch
- .register %g6,#scratch
-
- .text
- .align 32
-
-ENTRY(bcopy)
- sub %o1, %o0, %o4
- mov %o0, %g4
- cmp %o4, %o2
- mov %o1, %o0
- bgeu,pt %XCC, 100f
- mov %g4, %o1
-#ifndef USE_BPR
- srl %o2, 0, %o2
-#endif
- brnz,pn %o2, 220f
- add %o0, %o2, %o0
- retl
- nop
-END(bcopy)
-
- /* Special/non-trivial issues of this code:
- *
- * 1) %o5 is preserved from VISEntryHalf to VISExitHalf
- * 2) Only low 32 FPU registers are used so that only the
- * lower half of the FPU register set is dirtied by this
- * code. This is especially important in the kernel.
- * 3) This code never prefetches cachelines past the end
- * of the source buffer.
- *
- * The cheetah's flexible spine, oversized liver, enlarged heart,
- * slender muscular body, and claws make it the swiftest hunter
- * in Africa and the fastest animal on land. Can reach speeds
- * of up to 2.4GB per second.
- */
- .align 32
-ENTRY(memcpy)
-
-100: /* %o0=dst, %o1=src, %o2=len */
- mov %o0, %g5
- cmp %o2, 0
- be,pn %XCC, out
-218: or %o0, %o1, %o3
- cmp %o2, 16
- bleu,a,pn %XCC, small_copy
- or %o3, %o2, %o3
-
- cmp %o2, 256
- blu,pt %XCC, medium_copy
- andcc %o3, 0x7, %g0
-
- ba,pt %xcc, enter
- andcc %o0, 0x3f, %g2
-
- /* Here len >= 256 and condition codes reflect execution
- * of "andcc %o0, 0x7, %g2", done by caller.
- */
- .align 64
-enter:
- /* Is 'dst' already aligned on an 64-byte boundary? */
- be,pt %XCC, 2f
-
- /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number
- * of bytes to copy to make 'dst' 64-byte aligned. We pre-
- * subtract this from 'len'.
- */
- sub %g2, 0x40, %g2
- sub %g0, %g2, %g2
- sub %o2, %g2, %o2
-
- /* Copy %g2 bytes from src to dst, one byte at a time. */
-1: ldub [%o1 + 0x00], %o3
- add %o1, 0x1, %o1
- add %o0, 0x1, %o0
- subcc %g2, 0x1, %g2
-
- bg,pt %XCC, 1b
- stb %o3, [%o0 + -1]
-
-2: VISEntryHalf
- and %o1, 0x7, %g1
- ba,pt %xcc, begin
- alignaddr %o1, %g0, %o1
-
- .align 64
-begin:
- prefetch [%o1 + 0x000], #one_read
- prefetch [%o1 + 0x040], #one_read
- andn %o2, (0x40 - 1), %o4
- prefetch [%o1 + 0x080], #one_read
- prefetch [%o1 + 0x0c0], #one_read
- ldd [%o1 + 0x000], %f0
- prefetch [%o1 + 0x100], #one_read
- ldd [%o1 + 0x008], %f2
- prefetch [%o1 + 0x140], #one_read
- ldd [%o1 + 0x010], %f4
- prefetch [%o1 + 0x180], #one_read
- faligndata %f0, %f2, %f16
- ldd [%o1 + 0x018], %f6
- faligndata %f2, %f4, %f18
- ldd [%o1 + 0x020], %f8
- faligndata %f4, %f6, %f20
- ldd [%o1 + 0x028], %f10
- faligndata %f6, %f8, %f22
-
- ldd [%o1 + 0x030], %f12
- faligndata %f8, %f10, %f24
- ldd [%o1 + 0x038], %f14
- faligndata %f10, %f12, %f26
- ldd [%o1 + 0x040], %f0
-
- sub %o4, 0x80, %o4
- add %o1, 0x40, %o1
- ba,pt %xcc, loop
- srl %o4, 6, %o3
-
- .align 64
-loop:
- ldd [%o1 + 0x008], %f2
- faligndata %f12, %f14, %f28
- ldd [%o1 + 0x010], %f4
- faligndata %f14, %f0, %f30
- stda %f16, [%o0] ASI_BLK_P
- ldd [%o1 + 0x018], %f6
- faligndata %f0, %f2, %f16
-
- ldd [%o1 + 0x020], %f8
- faligndata %f2, %f4, %f18
- ldd [%o1 + 0x028], %f10
- faligndata %f4, %f6, %f20
- ldd [%o1 + 0x030], %f12
- faligndata %f6, %f8, %f22
- ldd [%o1 + 0x038], %f14
- faligndata %f8, %f10, %f24
-
- ldd [%o1 + 0x040], %f0
- prefetch [%o1 + 0x180], #one_read
- faligndata %f10, %f12, %f26
- subcc %o3, 0x01, %o3
- add %o1, 0x40, %o1
- bg,pt %XCC, loop
- add %o0, 0x40, %o0
-
- /* Finally we copy the last full 64-byte block. */
-loopfini:
- ldd [%o1 + 0x008], %f2
- faligndata %f12, %f14, %f28
- ldd [%o1 + 0x010], %f4
- faligndata %f14, %f0, %f30
- stda %f16, [%o0] ASI_BLK_P
- ldd [%o1 + 0x018], %f6
- faligndata %f0, %f2, %f16
- ldd [%o1 + 0x020], %f8
- faligndata %f2, %f4, %f18
- ldd [%o1 + 0x028], %f10
- faligndata %f4, %f6, %f20
- ldd [%o1 + 0x030], %f12
- faligndata %f6, %f8, %f22
- ldd [%o1 + 0x038], %f14
- faligndata %f8, %f10, %f24
- cmp %g1, 0
- be,pt %XCC, 1f
- add %o0, 0x40, %o0
- ldd [%o1 + 0x040], %f0
-1: faligndata %f10, %f12, %f26
- faligndata %f12, %f14, %f28
- faligndata %f14, %f0, %f30
- stda %f16, [%o0] ASI_BLK_P
- add %o0, 0x40, %o0
- add %o1, 0x40, %o1
- membar #Sync
-
- /* Now we copy the (len modulo 64) bytes at the end.
- * Note how we borrow the %f0 loaded above.
- *
- * Also notice how this code is careful not to perform a
- * load past the end of the src buffer.
- */
-loopend:
- and %o2, 0x3f, %o2
- andcc %o2, 0x38, %g2
- be,pn %XCC, endcruft
- subcc %g2, 0x8, %g2
- be,pn %XCC, endcruft
- cmp %g1, 0
-
- be,a,pt %XCC, 1f
- ldd [%o1 + 0x00], %f0
-
-1: ldd [%o1 + 0x08], %f2
- add %o1, 0x8, %o1
- sub %o2, 0x8, %o2
- subcc %g2, 0x8, %g2
- faligndata %f0, %f2, %f8
- std %f8, [%o0 + 0x00]
- be,pn %XCC, endcruft
- add %o0, 0x8, %o0
- ldd [%o1 + 0x08], %f0
- add %o1, 0x8, %o1
- sub %o2, 0x8, %o2
- subcc %g2, 0x8, %g2
- faligndata %f2, %f0, %f8
- std %f8, [%o0 + 0x00]
- bne,pn %XCC, 1b
- add %o0, 0x8, %o0
-
- /* If anything is left, we copy it one byte at a time.
- * Note that %g1 is (src & 0x3) saved above before the
- * alignaddr was performed.
- */
-endcruft:
- cmp %o2, 0
- add %o1, %g1, %o1
- VISExitHalf
- be,pn %XCC, out
- sub %o0, %o1, %o3
-
- andcc %g1, 0x7, %g0
- bne,pn %icc, small_copy_unaligned
- andcc %o2, 0x8, %g0
- be,pt %icc, 1f
- nop
- ldx [%o1], %o5
- stx %o5, [%o1 + %o3]
- add %o1, 0x8, %o1
-
-1: andcc %o2, 0x4, %g0
- be,pt %icc, 1f
- nop
- lduw [%o1], %o5
- stw %o5, [%o1 + %o3]
- add %o1, 0x4, %o1
-
-1: andcc %o2, 0x2, %g0
- be,pt %icc, 1f
- nop
- lduh [%o1], %o5
- sth %o5, [%o1 + %o3]
- add %o1, 0x2, %o1
-
-1: andcc %o2, 0x1, %g0
- be,pt %icc, out
- nop
- ldub [%o1], %o5
- ba,pt %xcc, out
- stb %o5, [%o1 + %o3]
-
-medium_copy: /* 16 < len <= 64 */
- bne,pn %XCC, small_copy_unaligned
- sub %o0, %o1, %o3
-
-medium_copy_aligned:
- andn %o2, 0x7, %o4
- and %o2, 0x7, %o2
-1: subcc %o4, 0x8, %o4
- ldx [%o1], %o5
- stx %o5, [%o1 + %o3]
- bgu,pt %XCC, 1b
- add %o1, 0x8, %o1
- andcc %o2, 0x4, %g0
- be,pt %XCC, 1f
- nop
- sub %o2, 0x4, %o2
- lduw [%o1], %o5
- stw %o5, [%o1 + %o3]
- add %o1, 0x4, %o1
-1: cmp %o2, 0
- be,pt %XCC, out
- nop
- ba,pt %xcc, small_copy_unaligned
- nop
-
-small_copy: /* 0 < len <= 16 */
- andcc %o3, 0x3, %g0
- bne,pn %XCC, small_copy_unaligned
- sub %o0, %o1, %o3
-
-small_copy_aligned:
- subcc %o2, 4, %o2
- lduw [%o1], %g1
- stw %g1, [%o1 + %o3]
- bgu,pt %XCC, small_copy_aligned
- add %o1, 4, %o1
-
-out: retl
- mov %g5, %o0
-
- .align 32
-small_copy_unaligned:
- subcc %o2, 1, %o2
- ldub [%o1], %g1
- stb %g1, [%o1 + %o3]
- bgu,pt %XCC, small_copy_unaligned
- add %o1, 1, %o1
- retl
- mov %g5, %o0
-
-END(memcpy)
-
-#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src - offset - 0x20], %t0; \
- ldx [%src - offset - 0x18], %t1; \
- ldx [%src - offset - 0x10], %t2; \
- ldx [%src - offset - 0x08], %t3; \
- stw %t0, [%dst - offset - 0x1c]; \
- srlx %t0, 32, %t0; \
- stw %t0, [%dst - offset - 0x20]; \
- stw %t1, [%dst - offset - 0x14]; \
- srlx %t1, 32, %t1; \
- stw %t1, [%dst - offset - 0x18]; \
- stw %t2, [%dst - offset - 0x0c]; \
- srlx %t2, 32, %t2; \
- stw %t2, [%dst - offset - 0x10]; \
- stw %t3, [%dst - offset - 0x04]; \
- srlx %t3, 32, %t3; \
- stw %t3, [%dst - offset - 0x08];
-
-#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src - offset - 0x20], %t0; \
- ldx [%src - offset - 0x18], %t1; \
- ldx [%src - offset - 0x10], %t2; \
- ldx [%src - offset - 0x08], %t3; \
- stx %t0, [%dst - offset - 0x20]; \
- stx %t1, [%dst - offset - 0x18]; \
- stx %t2, [%dst - offset - 0x10]; \
- stx %t3, [%dst - offset - 0x08]; \
- ldx [%src - offset - 0x40], %t0; \
- ldx [%src - offset - 0x38], %t1; \
- ldx [%src - offset - 0x30], %t2; \
- ldx [%src - offset - 0x28], %t3; \
- stx %t0, [%dst - offset - 0x40]; \
- stx %t1, [%dst - offset - 0x38]; \
- stx %t2, [%dst - offset - 0x30]; \
- stx %t3, [%dst - offset - 0x28];
-
-#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src + offset + 0x00], %t0; \
- ldx [%src + offset + 0x08], %t1; \
- stw %t0, [%dst + offset + 0x04]; \
- srlx %t0, 32, %t2; \
- stw %t2, [%dst + offset + 0x00]; \
- stw %t1, [%dst + offset + 0x0c]; \
- srlx %t1, 32, %t3; \
- stw %t3, [%dst + offset + 0x08];
-
-#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \
- ldx [%src + offset + 0x00], %t0; \
- ldx [%src + offset + 0x08], %t1; \
- stx %t0, [%dst + offset + 0x00]; \
- stx %t1, [%dst + offset + 0x08];
-
- .align 32
-228: andcc %o2, 1, %g0 /* IEU1 Group */
- be,pt %icc, 2f+4 /* CTI */
-1: ldub [%o1 - 1], %o5 /* LOAD Group */
- sub %o1, 1, %o1 /* IEU0 */
- sub %o0, 1, %o0 /* IEU1 */
- subcc %o2, 1, %o2 /* IEU1 Group */
- be,pn %xcc, 229f /* CTI */
- stb %o5, [%o0] /* Store */
-2: ldub [%o1 - 1], %o5 /* LOAD Group */
- sub %o0, 2, %o0 /* IEU0 */
- ldub [%o1 - 2], %g5 /* LOAD Group */
- sub %o1, 2, %o1 /* IEU0 */
- subcc %o2, 2, %o2 /* IEU1 Group */
- stb %o5, [%o0 + 1] /* Store */
- bne,pt %xcc, 2b /* CTI */
- stb %g5, [%o0] /* Store */
-229: retl
- mov %g4, %o0
-
- .align 32
-ENTRY(memmove)
- mov %o0, %g5
-#ifndef USE_BPR
- srl %o2, 0, %o2 /* IEU1 Group */
-#endif
- brz,pn %o2, out /* CTI Group */
- sub %o0, %o1, %o4 /* IEU0 */
- cmp %o4, %o2 /* IEU1 Group */
- bgeu,pt %XCC, 218b /* CTI */
- mov %o0, %g4 /* IEU0 */
- add %o0, %o2, %o0 /* IEU0 Group */
-220: add %o1, %o2, %o1 /* IEU1 */
- cmp %o2, 15 /* IEU1 Group */
- bleu,pn %xcc, 228b /* CTI */
- andcc %o0, 7, %g2 /* IEU1 Group */
- sub %o0, %o1, %g5 /* IEU0 */
- andcc %g5, 3, %o5 /* IEU1 Group */
- bne,pn %xcc, 232f /* CTI */
- andcc %o1, 3, %g0 /* IEU1 Group */
- be,a,pt %xcc, 236f /* CTI */
- andcc %o1, 4, %g0 /* IEU1 Group */
- andcc %o1, 1, %g0 /* IEU1 Group */
- be,pn %xcc, 4f /* CTI */
- andcc %o1, 2, %g0 /* IEU1 Group */
- ldub [%o1 - 1], %g2 /* Load Group */
- sub %o1, 1, %o1 /* IEU0 */
- sub %o0, 1, %o0 /* IEU1 */
- sub %o2, 1, %o2 /* IEU0 Group */
- be,pn %xcc, 5f /* CTI Group */
- stb %g2, [%o0] /* Store */
-4: lduh [%o1 - 2], %g2 /* Load Group */
- sub %o1, 2, %o1 /* IEU0 */
- sub %o0, 2, %o0 /* IEU1 */
- sub %o2, 2, %o2 /* IEU0 */
- sth %g2, [%o0] /* Store Group + bubble */
-5: andcc %o1, 4, %g0 /* IEU1 */
-236: be,a,pn %xcc, 2f /* CTI */
- andcc %o2, -128, %g6 /* IEU1 Group */
- lduw [%o1 - 4], %g5 /* Load Group */
- sub %o1, 4, %o1 /* IEU0 */
- sub %o0, 4, %o0 /* IEU1 */
- sub %o2, 4, %o2 /* IEU0 Group */
- stw %g5, [%o0] /* Store */
- andcc %o2, -128, %g6 /* IEU1 Group */
-2: be,pn %xcc, 235f /* CTI */
- andcc %o0, 4, %g0 /* IEU1 Group */
- be,pn %xcc, 282f + 4 /* CTI Group */
-5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
- RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
- RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
- RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
- subcc %g6, 128, %g6 /* IEU1 Group */
- sub %o1, 128, %o1 /* IEU0 */
- bne,pt %xcc, 5b /* CTI */
- sub %o0, 128, %o0 /* IEU0 Group */
-235: andcc %o2, 0x70, %g6 /* IEU1 Group */
-41: be,pn %xcc, 280f /* CTI */
- andcc %o2, 8, %g0 /* IEU1 Group */
- /* Clk1 8-( */
- /* Clk2 8-( */
- /* Clk3 8-( */
- /* Clk4 8-( */
-279: rd %pc, %o5 /* PDU Group */
- sll %g6, 1, %g5 /* IEU0 Group */
- sub %o1, %g6, %o1 /* IEU1 */
- sub %o5, %g5, %o5 /* IEU0 Group */
- jmpl %o5 + %lo(280f - 279b), %g0 /* CTI Group brk forced*/
- sub %o0, %g6, %o0 /* IEU0 Group */
- RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
-280: be,pt %xcc, 281f /* CTI */
- andcc %o2, 4, %g0 /* IEU1 */
- ldx [%o1 - 8], %g2 /* Load Group */
- sub %o0, 8, %o0 /* IEU0 */
- stw %g2, [%o0 + 4] /* Store Group */
- sub %o1, 8, %o1 /* IEU1 */
- srlx %g2, 32, %g2 /* IEU0 Group */
- stw %g2, [%o0] /* Store */
-281: be,pt %xcc, 1f /* CTI */
- andcc %o2, 2, %g0 /* IEU1 Group */
- lduw [%o1 - 4], %g2 /* Load Group */
- sub %o1, 4, %o1 /* IEU0 */
- stw %g2, [%o0 - 4] /* Store Group */
- sub %o0, 4, %o0 /* IEU0 */
-1: be,pt %xcc, 1f /* CTI */
- andcc %o2, 1, %g0 /* IEU1 Group */
- lduh [%o1 - 2], %g2 /* Load Group */
- sub %o1, 2, %o1 /* IEU0 */
- sth %g2, [%o0 - 2] /* Store Group */
- sub %o0, 2, %o0 /* IEU0 */
-1: be,pt %xcc, 211f /* CTI */
- nop /* IEU1 */
- ldub [%o1 - 1], %g2 /* Load Group */
- stb %g2, [%o0 - 1] /* Store Group + bubble */
-211: retl
- mov %g4, %o0
-
-282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
- RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
- subcc %g6, 128, %g6 /* IEU1 Group */
- sub %o1, 128, %o1 /* IEU0 */
- bne,pt %xcc, 282b /* CTI */
- sub %o0, 128, %o0 /* IEU0 Group */
- andcc %o2, 0x70, %g6 /* IEU1 */
- be,pn %xcc, 284f /* CTI */
- andcc %o2, 8, %g0 /* IEU1 Group */
- /* Clk1 8-( */
- /* Clk2 8-( */
- /* Clk3 8-( */
- /* Clk4 8-( */
-283: rd %pc, %o5 /* PDU Group */
- sub %o1, %g6, %o1 /* IEU0 Group */
- sub %o5, %g6, %o5 /* IEU1 */
- jmpl %o5 + %lo(284f - 283b), %g0 /* CTI Group brk forced*/
- sub %o0, %g6, %o0 /* IEU0 Group */
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
-284: be,pt %xcc, 285f /* CTI Group */
- andcc %o2, 4, %g0 /* IEU1 */
- ldx [%o1 - 8], %g2 /* Load Group */
- sub %o0, 8, %o0 /* IEU0 */
- sub %o1, 8, %o1 /* IEU0 Group */
- stx %g2, [%o0] /* Store */
-285: be,pt %xcc, 1f /* CTI */
- andcc %o2, 2, %g0 /* IEU1 Group */
- lduw [%o1 - 4], %g2 /* Load Group */
- sub %o0, 4, %o0 /* IEU0 */
- sub %o1, 4, %o1 /* IEU0 Group */
- stw %g2, [%o0] /* Store */
-1: be,pt %xcc, 1f /* CTI */
- andcc %o2, 1, %g0 /* IEU1 Group */
- lduh [%o1 - 2], %g2 /* Load Group */
- sub %o0, 2, %o0 /* IEU0 */
- sub %o1, 2, %o1 /* IEU0 Group */
- sth %g2, [%o0] /* Store */
-1: be,pt %xcc, 1f /* CTI */
- nop /* IEU0 Group */
- ldub [%o1 - 1], %g2 /* Load Group */
- stb %g2, [%o0 - 1] /* Store Group + bubble */
-1: retl
- mov %g4, %o0
-
-232: brz,pt %g2, 2f /* CTI Group */
- sub %o2, %g2, %o2 /* IEU0 Group */
-1: ldub [%o1 - 1], %g5 /* Load Group */
- sub %o1, 1, %o1 /* IEU0 */
- sub %o0, 1, %o0 /* IEU1 */
- subcc %g2, 1, %g2 /* IEU1 Group */
- bne,pt %xcc, 1b /* CTI */
- stb %g5, [%o0] /* Store */
-2: andn %o2, 7, %g5 /* IEU0 Group */
- and %o2, 7, %o2 /* IEU1 */
- fmovd %f0, %f2 /* FPU */
- alignaddr %o1, %g0, %g1 /* GRU Group */
- ldd [%g1], %f4 /* Load Group */
-1: ldd [%g1 - 8], %f6 /* Load Group */
- sub %g1, 8, %g1 /* IEU0 Group */
- subcc %g5, 8, %g5 /* IEU1 */
- faligndata %f6, %f4, %f0 /* GRU Group */
- std %f0, [%o0 - 8] /* Store */
- sub %o1, 8, %o1 /* IEU0 Group */
- be,pn %xcc, 233f /* CTI */
- sub %o0, 8, %o0 /* IEU1 */
- ldd [%g1 - 8], %f4 /* Load Group */
- sub %g1, 8, %g1 /* IEU0 */
- subcc %g5, 8, %g5 /* IEU1 */
- faligndata %f4, %f6, %f0 /* GRU Group */
- std %f0, [%o0 - 8] /* Store */
- sub %o1, 8, %o1 /* IEU0 */
- bne,pn %xcc, 1b /* CTI Group */
- sub %o0, 8, %o0 /* IEU0 */
-233: brz,pn %o2, 234f /* CTI Group */
- nop /* IEU0 */
-237: ldub [%o1 - 1], %g5 /* LOAD */
- sub %o1, 1, %o1 /* IEU0 */
- sub %o0, 1, %o0 /* IEU1 */
- subcc %o2, 1, %o2 /* IEU1 */
- bne,pt %xcc, 237b /* CTI */
- stb %g5, [%o0] /* Store Group */
-234: wr %g0, FPRS_FEF, %fprs
- retl
- mov %g4, %o0
-END(memmove)
-
-#ifdef USE_BPR
-weak_alias (memcpy, __align_cpy_1)
-weak_alias (memcpy, __align_cpy_2)
-weak_alias (memcpy, __align_cpy_4)
-weak_alias (memcpy, __align_cpy_8)
-weak_alias (memcpy, __align_cpy_16)
-#endif
-libc_hidden_builtin_def (memcpy)
-libc_hidden_builtin_def (memmove)
diff --git a/libc/sysdeps/sparc/sparc64/sparcv9v2/memset.S b/libc/sysdeps/sparc/sparc64/sparcv9v2/memset.S
deleted file mode 100644
index 809d3ed9c..000000000
--- a/libc/sysdeps/sparc/sparc64/sparcv9v2/memset.S
+++ /dev/null
@@ -1 +0,0 @@
-#include <sparc64/sparcv9v/memset.S>
diff --git a/libc/sysdeps/sparc/sparc64/strlen.S b/libc/sysdeps/sparc/sparc64/strlen.S
index cc15e4e3f..64350fb05 100644
--- a/libc/sysdeps/sparc/sparc64/strlen.S
+++ b/libc/sysdeps/sparc/sparc64/strlen.S
@@ -1,8 +1,9 @@
/* Determine the length of a string. For SPARC v9.
- Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc.
+ Copyright (C) 1998, 1999, 2003, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
- Jakub Jelinek <jj@ultra.linux.cz>.
+ Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz>,
+ Jakub Jelinek <jj@ultra.linux.cz>, and
+ David S. Miller <davem@davemloft.net>.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -20,155 +21,66 @@
02111-1307 USA. */
#include <sysdep.h>
-#include <asm/asi.h>
-
- /* Normally, this uses
- ((xword - 0x0101010101010101) & 0x8080808080808080) test
- to find out if any byte in xword could be zero. This is fast, but
- also gives false alarm for any byte in range 0x81-0xff. It does
- not matter for correctness, as if this test tells us there could
- be some zero byte, we check it byte by byte, but if bytes with
- high bits set are common in the strings, then this will give poor
- performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
- will use one tick slower, but more precise test
- ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
- which does not give any false alarms (but if some bits are set,
- one cannot assume from it which bytes are zero and which are not).
- It is yet to be measured, what is the correct default for glibc
- in these days for an average user.
- */
+
+ .register %g2, #scratch
+ .register %g3, #scratch
.text
.align 32
ENTRY(strlen)
- sethi %hi(0x01010101), %g1 /* IEU0 Group */
- ldub [%o0], %o3 /* Load */
- or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
- mov %o0, %o1 /* IEU1 */
-
- sllx %g1, 32, %g4 /* IEU0 Group */
- andcc %o0, 7, %g0 /* IEU1 */
- or %g1, %g4, %g1 /* IEU0 Group */
- brz,pn %o3, 13f /* CTI+IEU1 */
-
- sllx %g1, 7, %g4 /* IEU0 Group */
- bne,a,pn %icc, 15f /* CTI */
- add %o0, 1, %o0 /* IEU1 */
- /* %g1 = 0x0101010101010101 *
- * %g4 = 0x8080808080808080 *
- * %o0 = string pointer *
- * %o1 = start of string */
-1: ldx [%o0], %o3 /* Load Group */
-
- add %o0, 8, %o0 /* IEU1 */
-2: sub %o3, %g1, %o2 /* IEU0 Group */
-#ifdef EIGHTBIT_NOT_RARE
- andn %o2, %o3, %o5 /* IEU0 Group */
- ldxa [%o0] ASI_PNF, %o3 /* Load */
- andcc %o5, %g4, %g0 /* IEU1 Group */
-#else
- ldxa [%o0] ASI_PNF, %o3 /* Load */
- andcc %o2, %g4, %g0 /* IEU1 Group */
-#endif
-
- be,pt %xcc, 2b /* CTI */
- add %o0, 8, %o0 /* IEU0 */
- addcc %o2, %g1, %g5 /* IEU1 Group */
-#ifdef EIGHTBIT_NOT_RARE
- srlx %o5, 32, %o5 /* IEU0 */
-
-3: andcc %o5, %g4, %g0 /* IEU1 Group */
-#else
- srlx %o2, 32, %o2 /* IEU0 */
-
-3: andcc %o2, %g4, %g0 /* IEU1 Group */
-#endif
- be,pn %xcc, 4f /* CTI */
- srlx %g5, 56, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 12f /* CTI */
- srlx %g5, 48, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 11f /* CTI */
-
- srlx %g5, 40, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 10f /* CTI */
- srlx %g5, 32, %o2 /* IEU0 */
-
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 9f /* CTI */
-4: srlx %g5, 24, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 8f /* CTI */
- srlx %g5, 16, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 7f /* CTI */
-
- srlx %g5, 8, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 6f /* CTI */
- sub %o3, %g1, %o2 /* IEU0 */
-
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 5f /* CTI */
- ldxa [%o0] ASI_PNF, %o3 /* Load */
- andcc %o2, %g4, %g0 /* IEU1 Group */
-
- be,pt %xcc, 2b /* CTI */
- add %o0, 8, %o0 /* IEU0 */
- addcc %o2, %g1, %g5 /* IEU1 Group */
- ba,pt %xcc, 3b /* CTI */
-
- srlx %o2, 32, %o2 /* IEU0 */
-5: add %o0, -9, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-
-6: add %o0, -10, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-7: add %o0, -11, %o0 /* IEU0 Group */
-
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-8: add %o0, -12, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
-
- sub %o0, %o1, %o0 /* IEU0 */
-9: add %o0, -13, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-
-10: add %o0, -14, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-11: add %o0, -15, %o0 /* IEU0 Group */
-
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-12: add %o0, -16, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
-
- sub %o0, %o1, %o0 /* IEU0 */
-13: retl /* CTI+IEU1 Group */
- mov 0, %o0 /* IEU0 */
- nop
-
-15: ldub [%o0], %o3 /* Load Group */
-16: andcc %o0, 7, %g0 /* IEU1 */
- be,pn %icc, 1b /* CTI */
- nop /* IEU0 Group */
-
- add %o0, 1, %o0 /* IEU1 */
- andcc %o3, 0xff, %g0 /* IEU1 Group */
- bne,a,pt %icc, 16b /* CTI */
- lduba [%o0] ASI_PNF, %o3 /* Load */
-
- add %o0, -1, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
+ mov %o0, %o1
+ andn %o0, 0x7, %o0
+
+ ldx [%o0], %o5
+ and %o1, 0x7, %g1
+ mov -1, %g5
+
+ sethi %hi(0x01010101), %o2
+ sll %g1, 3, %g1
+
+ or %o2, %lo(0x01010101), %o2
+ srlx %g5, %g1, %o3
+
+ sllx %o2, 32, %g1
+ sethi %hi(0x0000ff00), %g5
+
+ orn %o5, %o3, %o5
+ or %o2, %g1, %o2
+
+ sllx %o2, 7, %o3
+10: add %o0, 8, %o0
+
+ andn %o3, %o5, %g1
+ sub %o5, %o2, %g2
+
+ andcc %g1, %g2, %g0
+ be,a,pt %xcc, 10b
+ ldx [%o0], %o5
+ srlx %o5, 32, %g1
+
+ andn %o3, %g1, %o4
+ sub %g1, %o2, %g2
+
+ add %o0, 4, %g3
+ andcc %o4, %g2, %g0
+ movne %icc, %g1, %o5
+
+ move %icc, %g3, %o0
+ or %g5, %lo(0x0000ff00), %g5
+ mov 3 - 8, %g2
+
+ andcc %o5, %g5, %g0
+ srlx %o5, 16, %g1
+ move %icc, 2 - 8, %g2
+
+ andcc %g1, 0xff, %g0
+ srl %o5, 24, %o5
+ move %icc, 1 - 8, %g2
+
+ movrz %o5, 0 - 8, %g2
+ sub %o0, %o1, %o0
+
+ retl
+ add %o0, %g2, %o0
END(strlen)
libc_hidden_builtin_def (strlen)
diff --git a/libc/sysdeps/unix/sysv/linux/bits/in.h b/libc/sysdeps/unix/sysv/linux/bits/in.h
index b457a1790..0aa0d6638 100644
--- a/libc/sysdeps/unix/sysv/linux/bits/in.h
+++ b/libc/sysdeps/unix/sysv/linux/bits/in.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991-1999, 2000, 2004, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 1991-1999, 2000, 2004, 2008, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -56,14 +56,26 @@
# define MCAST_INCLUDE 1
#endif
-#define IP_ROUTER_ALERT 5 /* bool */
-#define IP_PKTINFO 8 /* bool */
-#define IP_PKTOPTIONS 9
-#define IP_PMTUDISC 10 /* obsolete name? */
-#define IP_MTU_DISCOVER 10 /* int; see below */
-#define IP_RECVERR 11 /* bool */
-#define IP_RECVTTL 12 /* bool */
-#define IP_RECVTOS 13 /* bool */
+#define IP_ROUTER_ALERT 5 /* bool */
+#define IP_PKTINFO 8 /* bool */
+#define IP_PKTOPTIONS 9
+#define IP_PMTUDISC 10 /* obsolete name? */
+#define IP_MTU_DISCOVER 10 /* int; see below */
+#define IP_RECVERR 11 /* bool */
+#define IP_RECVTTL 12 /* bool */
+#define IP_RECVTOS 13 /* bool */
+#define IP_MTU 14 /* int */
+#define IP_FREEBIND 15
+#define IP_IPSEC_POLICY 16
+#define IP_XFRM_POLICY 17
+#define IP_PASSSEC 18
+#define IP_TRANSPARENT 19
+
+/* TProxy original addresses */
+#define IP_ORIGDSTADDR 20
+#define IP_RECVORIGDSTADDR IP_ORIGDSTADDR
+
+#define IP_MINTTL 21
/* IP_MTU_DISCOVER arguments. */
diff --git a/libc/sysdeps/unix/sysv/linux/internal_statvfs.c b/libc/sysdeps/unix/sysv/linux/internal_statvfs.c
index 28c1cb691..59b173ed7 100644
--- a/libc/sysdeps/unix/sysv/linux/internal_statvfs.c
+++ b/libc/sysdeps/unix/sysv/linux/internal_statvfs.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1998-2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+/* Copyright (C) 1998-2006, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
@@ -45,13 +45,15 @@ __statvfs_getflags (const char *name, int fstype, struct stat64 *st)
const char *fsname = NULL;
const char *fsname2 = NULL;
+ const char *fsname3 = NULL;
/* Map the filesystem type we got from the statfs call to a string. */
switch (fstype)
{
case EXT2_SUPER_MAGIC:
- fsname = "ext3";
- fsname2 = "ext2";
+ fsname = "ext4";
+ fsname2 = "ext3";
+ fsname3 = "ext2";
break;
case DEVPTS_SUPER_MAGIC:
fsname= "devpts";
@@ -98,6 +100,9 @@ __statvfs_getflags (const char *name, int fstype, struct stat64 *st)
case NTFS_SUPER_MAGIC:
fsname = "ntfs";
break;
+ case LOGFS_MAGIC_U32:
+ fsname = "logfs";
+ break;
}
FILE *mtab = __setmntent ("/proc/mounts", "r");
@@ -126,7 +131,9 @@ __statvfs_getflags (const char *name, int fstype, struct stat64 *st)
else if (fsname != NULL
&& strcmp (fsname, mntbuf.mnt_type) != 0
&& (fsname2 == NULL
- || strcmp (fsname2, mntbuf.mnt_type) != 0))
+ || strcmp (fsname2, mntbuf.mnt_type) != 0)
+ && (fsname3 == NULL
+ || strcmp (fsname3, mntbuf.mnt_type) != 0))
continue;
/* Find out about the device the current entry is for. */
@@ -176,7 +183,7 @@ __statvfs_getflags (const char *name, int fstype, struct stat64 *st)
{
/* Try without a filesystem name. */
assert (fsname != NULL);
- fsname = fsname2 = NULL;
+ fsname = fsname2 = fsname3 = NULL;
}
/* It is not strictly allowed to use rewind here. But
diff --git a/libc/sysdeps/unix/sysv/linux/linux_fsinfo.h b/libc/sysdeps/unix/sysv/linux/linux_fsinfo.h
index 8c6591ada..b10e98b46 100644
--- a/libc/sysdeps/unix/sysv/linux/linux_fsinfo.h
+++ b/libc/sysdeps/unix/sysv/linux/linux_fsinfo.h
@@ -1,5 +1,5 @@
/* Constants from kernel header for various FSes.
- Copyright (C) 1998,1999,2000,2001,2002,2003,2005 Free Software Foundation, Inc.
+ Copyright (C) 1998-2003,2005,2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -25,22 +25,22 @@
filesystem types will become available we have to add the
appropriate definitions here.*/
-/* Constants that identify the `adfs' filesystem. */
+/* Constant that identifies the `adfs' filesystem. */
#define ADFS_SUPER_MAGIC 0xadf5
-/* Constants that identify the `affs' filesystem. */
+/* Constant that identifies the `affs' filesystem. */
#define AFFS_SUPER_MAGIC 0xadff
-/* Constants that identify the `autofs' filesystem. */
+/* Constant that identifies the `autofs' filesystem. */
#define AUTOFS_SUPER_MAGIC 0x187
-/* Constants that identify the `bfs' filesystem. */
+/* Constant that identifies the `bfs' filesystem. */
#define BFS_MAGIC 0x1BADFACE
-/* Constants that identify the `coda' filesystem. */
+/* Constant that identifies the `coda' filesystem. */
#define CODA_SUPER_MAGIC 0x73757245
-/* Constants that identify the `coherent' filesystem. */
+/* Constant that identifies the `coherent' filesystem. */
#define COH_SUPER_MAGIC 0x012ff7b7
/* Constant that identifies the `ramfs' filesystem. */
@@ -52,7 +52,7 @@
/* Constant that identifies the `devpts' filesystem. */
#define DEVPTS_SUPER_MAGIC 0x1cd1
-/* Constant that identifies the `efs' filesystem. */
+/* Constants that identifies the `efs' filesystem. */
#define EFS_SUPER_MAGIC 0x414A53
#define EFS_MAGIC 0x072959
@@ -74,6 +74,9 @@
/* Constant that identifies the `jfs' filesystem. */
#define JFS_SUPER_MAGIC 0x3153464a
+/* Constant that identifies the `logfs´ filesystem. */
+#define LOGFS_MAGIC_U32 0xc97e8168u
+
/* Constants that identify the `minix2' filesystem. */
#define MINIX2_SUPER_MAGIC 0x2468
#define MINIX2_SUPER_MAGIC2 0x2478
@@ -82,62 +85,62 @@
#define MINIX_SUPER_MAGIC 0x137f
#define MINIX_SUPER_MAGIC2 0x138F
-/* Constants that identify the `msdos' filesystem. */
+/* Constant that identifies the `msdos' filesystem. */
#define MSDOS_SUPER_MAGIC 0x4d44
-/* Constants that identify the `ncp' filesystem. */
+/* Constant that identifies the `ncp' filesystem. */
#define NCP_SUPER_MAGIC 0x564c
-/* Constants that identify the `nfs' filesystem. */
+/* Constant that identifies the `nfs' filesystem. */
#define NFS_SUPER_MAGIC 0x6969
-/* Constants that identify the `ntfs' filesystem. */
+/* Constant that identifies the `ntfs' filesystem. */
#define NTFS_SUPER_MAGIC 0x5346544e
-/* Constants that identify the `proc' filesystem. */
+/* Constant that identifies the `proc' filesystem. */
#define PROC_SUPER_MAGIC 0x9fa0
/* Constant that identifies the `usbdevfs' filesystem. */
#define USBDEVFS_SUPER_MAGIC 0x9fa2
-/* Constants that identify the `qnx4' filesystem. */
+/* Constant that identifies the `qnx4' filesystem. */
#define QNX4_SUPER_MAGIC 0x002f
-/* Constants that identify the `reiser' filesystem. */
+/* Constant that identifies the `reiser' filesystem. */
#define REISERFS_SUPER_MAGIC 0x52654973
/* Constant that identifies the `romfs' filesystem. */
#define ROMFS_SUPER_MAGIC 0x7275
-/* Constants that identify the `smb' filesystem. */
+/* Constant that identifies the `shm' filesystem. */
+#define SHMFS_SUPER_MAGIC 0x01021994
+
+/* Constant that identifies the `smb' filesystem. */
#define SMB_SUPER_MAGIC 0x517b
+/* Constant that identifies the `sysfs´ filesystem. */
+#define SYSFS_MAGIC 0x62656572
+
/* Constants that identify the `sysV' filesystem. */
#define SYSV2_SUPER_MAGIC 0x012ff7b6
#define SYSV4_SUPER_MAGIC 0x012ff7b5
-/* Constants that identify the `udf' filesystem. */
+/* Constant that identifies the `udf' filesystem. */
#define UDF_SUPER_MAGIC 0x15013346
-/* Constants that identify the `ufs' filesystem. */
+/* Constant that identify the `ufs' filesystem. */
#define UFS_MAGIC 0x00011954
#define UFS_CIGAM 0x54190100 /* byteswapped MAGIC */
-/* Constants that identify the `xenix' filesystem. */
-#define XENIX_SUPER_MAGIC 0x012ff7b4
+/* Constant that identifies the `vxfs' filesystem. */
+#define VXFS_SUPER_MAGIC 0xa501fcf5
-/* Constant that identifies the `shm' filesystem. */
-#define SHMFS_SUPER_MAGIC 0x01021994
+/* Constant that identifies the `xenix' filesystem. */
+#define XENIX_SUPER_MAGIC 0x012ff7b4
-/* Constants that identify the `xfs' filesystem. */
+/* Constant that identifies the `xfs' filesystem. */
#define XFS_SUPER_MAGIC 0x58465342
-/* Constants that identify the `vxfs' filesystem. */
-#define VXFS_SUPER_MAGIC 0xa501fcf5
-
-/* Constants that identify the `sysfs´ filesystem. */
-#define SYSFS_MAGIC 0x62656572
-
/* Maximum link counts. */
#define COH_LINK_MAX 10000
#define EXT2_LINK_MAX 32000
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/____longjmp_chk.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/____longjmp_chk.S
index 4cb968505..cfd9864f6 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/____longjmp_chk.S
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/____longjmp_chk.S
@@ -28,19 +28,12 @@
#define __longjmp ____longjmp_chk
#ifdef PIC
-# ifdef HAVE_ASM_PPC_REL16
# define LOAD_ARG \
bcl 20,31,1f; \
1: mflr r3; \
addis r3,r3,_GLOBAL_OFFSET_TABLE_-1b@ha; \
addi r3,r3,_GLOBAL_OFFSET_TABLE_-1b@l; \
lwz r3,.LC0@got(r3)
-# else
-# define LOAD_ARG \
- bl _GLOBAL_OFFSET_TABLE_-4@local; \
- mflr r3; \
- lwz r3,.LC0@got(r3)
-# endif
#else
# define LOAD_ARG \
lis r3,.LC0@ha; \
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/brk.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/brk.S
index e94583494..4c8c6b433 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/brk.S
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/brk.S
@@ -36,17 +36,10 @@ ENTRY (BP_SYM (__brk))
DO_CALL(SYS_ify(brk))
lwz r6,8(r1)
#ifdef PIC
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r5
addis r5,r5,__curbrk-1b@ha
stw r3,__curbrk-1b@l(r5)
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r5
- lwz r5,__curbrk@got(r5)
- stw r3,0(r5)
-# endif
#else
lis r4,__curbrk@ha
stw r3,__curbrk@l(r4)
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S
index 953fe828a..fea41cd3a 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S
@@ -145,15 +145,10 @@ ENTRY(__CONTEXT_FUNC_NAME)
# ifdef __CONTEXT_ENABLE_VRS
# ifdef PIC
mflr r8
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r7
addis r7,r7,_GLOBAL_OFFSET_TABLE_-1b@ha
addi r7,r7,_GLOBAL_OFFSET_TABLE_-1b@l
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r7
-# endif
# ifdef SHARED
lwz r7,_rtld_global_ro@got(r7)
mtlr r8
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/power7/fpu/Implies b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/power7/fpu/Implies
index d379a2dd1..af946119a 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/power7/fpu/Implies
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/power7/fpu/Implies
@@ -1,3 +1,4 @@
# Make sure this comes before the powerpc/powerpc32/fpu that's
# listed in unix/sysv/linux/powerpc/powerpc32/fpu/Implies.
+powerpc/powerpc32/power7/fpu
powerpc/powerpc32/power5/fpu
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S
index b7e6ef9d4..4b8ee5e16 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S
@@ -73,15 +73,10 @@ ENTRY(__CONTEXT_FUNC_NAME)
#ifdef PIC
mflr r8
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r7
addis r7,r7,_GLOBAL_OFFSET_TABLE_-1b@ha
addi r7,r7,_GLOBAL_OFFSET_TABLE_-1b@l
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r7
-# endif
# ifdef SHARED
lwz r7,_rtld_global_ro@got(r7)
mtlr r8
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S
index 441133762..71c0ce57a 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S
@@ -146,15 +146,10 @@ ENTRY(__CONTEXT_FUNC_NAME)
# ifdef PIC
mflr r8
-# ifdef HAVE_ASM_PPC_REL16
bcl 20,31,1f
1: mflr r7
addis r7,r7,_GLOBAL_OFFSET_TABLE_-1b@ha
addi r7,r7,_GLOBAL_OFFSET_TABLE_-1b@l
-# else
- bl _GLOBAL_OFFSET_TABLE_@local-4
- mflr r7
-# endif
# ifdef SHARED
lwz r7,_rtld_global_ro@got(r7)
mtlr r8
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/power7/fpu/Implies b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/power7/fpu/Implies
index c46b3d42a..ca112208d 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/power7/fpu/Implies
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/power7/fpu/Implies
@@ -1,3 +1,4 @@
# Make sure this comes before the powerpc/powerpc64/fpu that's
# listed in unix/sysv/linux/powerpc/powerpc64/fpu/Implies.
+powerpc/powerpc64/power7/fpu
powerpc/powerpc64/power5/fpu
diff --git a/libc/sysdeps/unix/sysv/linux/s390/s390-32/utmp32.c b/libc/sysdeps/unix/sysv/linux/s390/s390-32/utmp32.c
index 32a5d7137..d5793b390 100644
--- a/libc/sysdeps/unix/sysv/linux/s390/s390-32/utmp32.c
+++ b/libc/sysdeps/unix/sysv/linux/s390/s390-32/utmp32.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008 Free Software Foundation, Inc.
+/* Copyright (C) 2008, 2010 Free Software Foundation, Inc.
Contributed by Andreas Krebbel <Andreas.Krebbel@de.ibm.com>.
This file is part of the GNU C Library.
@@ -31,7 +31,7 @@
calls. */
#define ALLOCATE_UTMP32_OUT(OUT) \
static struct utmp32 *OUT = NULL; \
- \
+ \
if (OUT == NULL) \
{ \
OUT = malloc (sizeof (struct utmp32)); \
@@ -62,7 +62,7 @@
struct utmp32 *
getutid32 (const struct utmp32 *id)
{
- ACCESS_UTMP_ENTRY (getutid, id)
+ ACCESS_UTMP_ENTRY (__getutid, id)
}
symbol_version (getutid32, getutid, GLIBC_2.0);
@@ -71,7 +71,7 @@ symbol_version (getutid32, getutid, GLIBC_2.0);
struct utmp32 *
getutline32 (const struct utmp32 *line)
{
- ACCESS_UTMP_ENTRY (getutline, line)
+ ACCESS_UTMP_ENTRY (__getutline, line)
}
symbol_version (getutline32, getutline, GLIBC_2.0);
@@ -79,7 +79,7 @@ symbol_version (getutline32, getutline, GLIBC_2.0);
struct utmp32 *
pututline32 (const struct utmp32 *utmp_ptr)
{
- ACCESS_UTMP_ENTRY (pututline, utmp_ptr)
+ ACCESS_UTMP_ENTRY (__pututline, utmp_ptr)
}
symbol_version (pututline32, pututline, GLIBC_2.0);
@@ -90,7 +90,7 @@ getutent32 (void)
struct utmp *out64;
ALLOCATE_UTMP32_OUT (out32);
- out64 = getutent ();
+ out64 = __getutent ();
if (!out64)
return NULL;
@@ -108,7 +108,7 @@ getutent32_r (struct utmp32 *buffer, struct utmp32 **result)
struct utmp *out64p;
int ret;
- ret = getutent_r (&out64, &out64p);
+ ret = __getutent_r (&out64, &out64p);
if (ret == -1)
{
*result = NULL;
@@ -133,7 +133,7 @@ getutid32_r (const struct utmp32 *id, struct utmp32 *buffer,
utmp_convert32to64 (id, &in64);
- ret = getutid_r (&in64, &out64, &out64p);
+ ret = __getutid_r (&in64, &out64, &out64p);
if (ret == -1)
{
*result = NULL;
@@ -158,7 +158,7 @@ getutline32_r (const struct utmp32 *line,
utmp_convert32to64 (line, &in64);
- ret = getutline_r (&in64, &out64, &out64p);
+ ret = __getutline_r (&in64, &out64, &out64p);
if (ret == -1)
{
*result = NULL;
@@ -180,6 +180,6 @@ updwtmp32 (const char *wtmp_file, const struct utmp32 *utmp)
struct utmp in32;
utmp_convert32to64 (utmp, &in32);
- updwtmp (wtmp_file, &in32);
+ __updwtmp (wtmp_file, &in32);
}
symbol_version (updwtmp32, updwtmp, GLIBC_2.0);
diff --git a/libc/sysdeps/unix/sysv/linux/s390/s390-32/utmpx32.c b/libc/sysdeps/unix/sysv/linux/s390/s390-32/utmpx32.c
index 69a1384db..790b46466 100644
--- a/libc/sysdeps/unix/sysv/linux/s390/s390-32/utmpx32.c
+++ b/libc/sysdeps/unix/sysv/linux/s390/s390-32/utmpx32.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008 Free Software Foundation, Inc.
+/* Copyright (C) 2008, 2010 Free Software Foundation, Inc.
Contributed by Andreas Krebbel <Andreas.Krebbel@de.ibm.com>.
This file is part of the GNU C Library.
@@ -34,7 +34,7 @@
calls. */
#define ALLOCATE_UTMPX32_OUT(OUT) \
static struct utmpx32 *OUT = NULL; \
- \
+ \
if (OUT == NULL) \
{ \
OUT = malloc (sizeof (struct utmpx32)); \
@@ -68,7 +68,7 @@ getutxent32 (void)
struct utmpx *out64;
ALLOCATE_UTMPX32_OUT (out32);
- out64 = getutxent ();
+ out64 = __getutxent ();
if (!out64)
return NULL;
@@ -82,7 +82,7 @@ symbol_version (getutxent32, getutxent, GLIBC_2.1);
struct utmpx32 *
getutxid32 (const struct utmpx32 *id)
{
- ACCESS_UTMPX_ENTRY (getutxid, id);
+ ACCESS_UTMPX_ENTRY (__getutxid, id);
}
symbol_version (getutxid32, getutxid, GLIBC_2.1);
@@ -90,7 +90,7 @@ symbol_version (getutxid32, getutxid, GLIBC_2.1);
struct utmpx32 *
getutxline32 (const struct utmpx32 *line)
{
- ACCESS_UTMPX_ENTRY (getutxline, line);
+ ACCESS_UTMPX_ENTRY (__getutxline, line);
}
symbol_version (getutxline32, getutxline, GLIBC_2.1);
@@ -98,7 +98,7 @@ symbol_version (getutxline32, getutxline, GLIBC_2.1);
struct utmpx32 *
pututxline32 (const struct utmpx32 *utmpx)
{
- ACCESS_UTMPX_ENTRY (pututxline, utmpx);
+ ACCESS_UTMPX_ENTRY (__pututxline, utmpx);
}
symbol_version (pututxline32, pututxline, GLIBC_2.1);
@@ -109,7 +109,7 @@ updwtmpx32 (const char *wtmpx_file, const struct utmpx32 *utmpx)
struct utmpx in64;
utmpx_convert32to64 (utmpx, &in64);
- updwtmpx (wtmpx_file, &in64);
+ __updwtmpx (wtmpx_file, &in64);
}
symbol_version (updwtmpx32, updwtmpx, GLIBC_2.1);
@@ -121,7 +121,7 @@ getutmp32 (const struct utmpx32 *utmpx, struct utmp32 *utmp)
struct utmp out64;
utmpx_convert32to64 (utmpx, &in64);
- getutmp (&in64, &out64);
+ __getutmp (&in64, &out64);
utmp_convert64to32 (&out64, utmp);
}
symbol_version (getutmp32, getutmp, GLIBC_2.1.1);
@@ -134,7 +134,7 @@ getutmpx32 (const struct utmp32 *utmp, struct utmpx32 *utmpx)
struct utmpx out64;
utmp_convert32to64 (utmp, &in64);
- getutmpx (&in64, &out64);
+ __getutmpx (&in64, &out64);
utmpx_convert64to32 (&out64, utmpx);
}
symbol_version (getutmpx32, getutmpx, GLIBC_2.1.1);
diff --git a/libc/sysdeps/unix/sysv/linux/s390/s390-32/utmpx32.h b/libc/sysdeps/unix/sysv/linux/s390/s390-32/utmpx32.h
index 5f468ed68..dedc6c6be 100644
--- a/libc/sysdeps/unix/sysv/linux/s390/s390-32/utmpx32.h
+++ b/libc/sysdeps/unix/sysv/linux/s390/s390-32/utmpx32.h
@@ -1,5 +1,5 @@
/* The `struct utmp' type, describing entries in the utmp file. GNU version.
- Copyright (C) 1993, 1996, 1997, 1998, 1999, 2002, 2008
+ Copyright (C) 1993, 1996, 1997, 1998, 1999, 2002, 2008, 2010
Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -49,4 +49,16 @@ struct utmpx32
char __unused[20]; /* Reserved for future use. */
};
+/* The internal interface needed by the compat wrapper functions. */
+extern struct utmpx *__getutxent (void);
+extern struct utmpx *__getutxid (__const struct utmpx *__id);
+extern struct utmpx *__getutxline (__const struct utmpx *__line);
+extern struct utmpx *__pututxline (__const struct utmpx *__utmpx);
+extern void __updwtmpx (__const char *__wtmpx_file,
+ __const struct utmpx *__utmpx);
+extern void __getutmp (__const struct utmpx *__utmpx,
+ struct utmp *__utmp);
+extern void __getutmpx (__const struct utmp *__utmp,
+ struct utmpx *__utmpx);
+
#endif /* utmpx32.h */
diff --git a/libc/sysdeps/unix/sysv/linux/sparc/Versions b/libc/sysdeps/unix/sysv/linux/sparc/Versions
index be3d2b96c..8fb638bb3 100644
--- a/libc/sysdeps/unix/sysv/linux/sparc/Versions
+++ b/libc/sysdeps/unix/sysv/linux/sparc/Versions
@@ -19,6 +19,10 @@ libc {
#errlist-compat 134
_sys_errlist; sys_errlist; _sys_nerr; sys_nerr;
}
+ GLIBC_2.12 {
+ #errlist-compat 135
+ _sys_errlist; sys_errlist; _sys_nerr; sys_nerr;
+ }
}
librt {
GLIBC_2.3 {
diff --git a/libc/sysdeps/unix/sysv/linux/sparc/sparc32/makecontext.c b/libc/sysdeps/unix/sysv/linux/sparc/sparc32/makecontext.c
index 9b48dade6..bcf63db10 100644
--- a/libc/sysdeps/unix/sysv/linux/sparc/sparc32/makecontext.c
+++ b/libc/sysdeps/unix/sysv/linux/sparc/sparc32/makecontext.c
@@ -77,7 +77,7 @@ __makecontext (ucontext_t *ucp, void (*func) (void), int argc, ...)
if (i < 6)
ucp->uc_mcontext.gregs[REG_O0 + i] = arg;
else
- sp[i + 23] = arg;
+ sp[i + 23 - 6] = arg;
}
va_end (ap);
diff --git a/libc/sysdeps/unix/sysv/linux/sparc/sparc64/msgrcv.c b/libc/sysdeps/unix/sysv/linux/sparc/sparc64/msgrcv.c
new file mode 100644
index 000000000..117762c77
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/sparc/sparc64/msgrcv.c
@@ -0,0 +1,49 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <errno.h>
+#include <sys/msg.h>
+#include <ipc_priv.h>
+
+#include <sysdep-cancel.h>
+#include <sys/syscall.h>
+
+#include <bp-checks.h>
+
+ssize_t
+__libc_msgrcv (msqid, msgp, msgsz, msgtyp, msgflg)
+ int msqid;
+ void *msgp;
+ size_t msgsz;
+ long int msgtyp;
+ int msgflg;
+{
+ if (SINGLE_THREAD_P)
+ return INLINE_SYSCALL (ipc, 6, IPCOP_msgrcv, msqid, msgsz, msgflg,
+ CHECK_N (msgp, msgsz), msgtyp);
+
+ int oldtype = LIBC_CANCEL_ASYNC ();
+
+ ssize_t result = INLINE_SYSCALL (ipc, 6, IPCOP_msgrcv, msqid, msgsz, msgflg,
+ CHECK_N (msgp, msgsz), msgtyp);
+
+ LIBC_CANCEL_RESET (oldtype);
+
+ return result;
+}
+weak_alias (__libc_msgrcv, msgrcv)
diff --git a/libc/sysdeps/unix/sysv/linux/sys/mount.h b/libc/sysdeps/unix/sysv/linux/sys/mount.h
index a41220d14..923b4616c 100644
--- a/libc/sysdeps/unix/sysv/linux/sys/mount.h
+++ b/libc/sysdeps/unix/sysv/linux/sys/mount.h
@@ -123,8 +123,10 @@ enum
#define MNT_FORCE MNT_FORCE
MNT_DETACH = 2, /* Just detach from the tree. */
#define MNT_DETACH MNT_DETACH
- MNT_EXPIRE = 4 /* Mark for expiry. */
+ MNT_EXPIRE = 4, /* Mark for expiry. */
#define MNT_EXPIRE MNT_EXPIRE
+ UMOUNT_NOFOLLOW = 8 /* Don't follow symlink on umount. */
+#define UMOUNT_NOFOLLOW UMOUNT_NOFOLLOW
};
diff --git a/libc/sysdeps/x86_64/Implies b/libc/sysdeps/x86_64/Implies
index 2b8412b0b..2e0a323e1 100644
--- a/libc/sysdeps/x86_64/Implies
+++ b/libc/sysdeps/x86_64/Implies
@@ -1,4 +1,5 @@
wordsize-64
ieee754/ldbl-96
+ieee754/dbl-64/wordsize-64
ieee754/dbl-64
ieee754/flt-32
diff --git a/libc/sysdeps/x86_64/dl-machine.h b/libc/sysdeps/x86_64/dl-machine.h
index 61a0556d5..f615e9591 100644
--- a/libc/sysdeps/x86_64/dl-machine.h
+++ b/libc/sysdeps/x86_64/dl-machine.h
@@ -1,5 +1,5 @@
/* Machine-dependent ELF dynamic relocation inline functions. x86-64 version.
- Copyright (C) 2001-2005, 2006, 2008, 2009 Free Software Foundation, Inc.
+ Copyright (C) 2001-2006, 2008, 2009, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>.
@@ -419,7 +419,7 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc,
case R_X86_64_PC32:
value += reloc->r_addend - (Elf64_Addr) reloc_addr;
*(unsigned int *) reloc_addr = value;
- if (__builtin_expect (value != (unsigned int) value, 0))
+ if (__builtin_expect (value != (int) value, 0))
{
fmt = "\
%s: Symbol `%s' causes overflow in R_X86_64_PC32 relocation\n";
diff --git a/libc/sysdeps/x86_64/fpu/fegetenv.c b/libc/sysdeps/x86_64/fpu/fegetenv.c
index fa5a8dadc..2159a1fab 100644
--- a/libc/sysdeps/x86_64/fpu/fegetenv.c
+++ b/libc/sysdeps/x86_64/fpu/fegetenv.c
@@ -28,3 +28,4 @@ fegetenv (fenv_t *envp)
/* Success. */
return 0;
}
+libm_hidden_def (fegetenv)
diff --git a/ports/ChangeLog.arm b/ports/ChangeLog.arm
index 9f3ee5391..fbd173ea8 100644
--- a/ports/ChangeLog.arm
+++ b/ports/ChangeLog.arm
@@ -1,3 +1,8 @@
+2010-02-10 Joseph Myers <joseph@codesourcery.com>
+
+ * sysdeps/arm/eabi/fegetenv.c, sysdeps/arm/fpu/fegetenv.c: Add
+ hidden alias.
+
2010-01-10 Joseph Myers <joseph@codesourcery.com>
* sysdeps/unix/sysv/linux/arm/bits/fcntl.h: Define O_DIRECTORY,
diff --git a/ports/ChangeLog.hppa b/ports/ChangeLog.hppa
index 28a3c44e7..9fd9f89fb 100644
--- a/ports/ChangeLog.hppa
+++ b/ports/ChangeLog.hppa
@@ -1,3 +1,23 @@
+2010-02-17 Matt Turner <mattst88@gmail.com>
+
+ * sysdeps/unix/sysv/linux/hppa/bits/socket.h: Actually fix
+ SOCK_CLOEXEC to match O_CLOEXEC.
+
+2010-02-02 Carlos O'Donell <carlos@codesourcery.com>
+
+ * sysdeps/unix/sysv/linux/hppa/bits/mman.h: Define
+ MADV_MERGEABLE and MADV_UNMERGEABLE.
+
+2010-02-02 Carlos O'Donell <carlos@codesourcery.com>
+
+ * sysdeps/hppa/nptl/tls.h (__set_cr27): Clobber
+ link register r31.
+
+2010-02-02 Carlos O'Donell <carlos@codesourcery.com>
+
+ * sysdeps/unix/sysv/linux/hppa/makecontext.c (__makecontext):
+ Support more than 8 arguments.
+
2010-02-01 Kyle McMartin <kyle@redhat.com>
* sysdeps/unix/sysv/linux/hppa/bits/socket.h: Fix value of
diff --git a/ports/ChangeLog.m68k b/ports/ChangeLog.m68k
index cdc9242e4..c7d068e36 100644
--- a/ports/ChangeLog.m68k
+++ b/ports/ChangeLog.m68k
@@ -1,3 +1,78 @@
+2010-03-13 Andreas Schwab <schwab@linux-m68k.org>
+
+ * sysdeps/m68k/m680x0/fpu/s_ccosh.c: Use signbit macro.
+ * sysdeps/m68k/m680x0/fpu/s_ccos.c: Likewise.
+
+ * sysdeps/m68k/m680x0/fpu/bits/mathinline.h (__signbit)
+ (__signbitf, __signbitl): Always define as inline.
+
+ * sysdeps/unix/sysv/linux/m68k/sysdep.h (SYSCALL_ERROR_HANDLER):
+ Add variant for USE__THREAD.
+
+2010-03-12 Andreas Schwab <schwab@linux-m68k.org>
+
+ * sysdeps/unix/sysv/linux/m68k/nptl/lowlevellock.h: Don't include
+ <sysdep.h>.
+
+ * sysdeps/unix/sysv/linux/m68k/m68k-helpers.S: Renamed ...
+ * sysdeps/unix/sysv/linux/m68k/coldfire/m68k-helpers.S: ... to
+ this.
+ * sysdeps/unix/sysv/linux/m68k/m680x0/m68k-helpers.S: New file.
+
+2010-03-09 Maxim Kuvyrkov <maxim@codesourcery.com>
+
+ NPTL support for m68k/ColdFire
+ * sysdeps/unix/sysv/linux/m68k/sysdep.h (tls.h): Include.
+ (INTERNAL_SYSCALL): Convert to INTERNAL_SYSCALL_NCS.
+ (PTR_MANGLE, PTR_DEMANGLE): Define.
+ (NEED_STATIC_SYSINFO_DSO): Define.
+ * sysdeps/unix/sysv/linux/m68k/clone.S: Support RESET_PID.
+ * sysdeps/unix/sysv/linux/m68k/m68k-helpers.S: New.
+ * sysdeps/unix/sysv/linux/m68k/Makefile: Add new files to lists.
+ * sysdeps/unix/sysv/linux/m68k/vfork.S: Add SAVE_PID/RESTORE_PID.
+ * sysdeps/unix/sysv/linux/m68k/m68k-vdso.c: New.
+ * sysdeps/unix/sysv/linux/m68k/libc-m68k-vdso.c: New.
+ * sysdeps/unix/sysv/linux/m68k/bits/m68k-vdso.h: New.
+ * sysdeps/unix/sysv/linux/m68k/coldfire/nptl/bits/atomic.h: New.
+ * sysdeps/unix/sysv/linux/m68k/Versions: Add symbols for NPTL support.
+ * sysdeps/unix/sysv/linux/m68k/init-first.c: New.
+ * sysdeps/unix/sysv/linux/m68k/nptl/sysdep-cancel.h: New.
+ * sysdeps/unix/sysv/linux/m68k/nptl/bits/pthreadtypes.h: New.
+ * sysdeps/unix/sysv/linux/m68k/nptl/bits/semaphore.h: New.
+ * sysdeps/unix/sysv/linux/m68k/nptl/clone.S: New.
+ * sysdeps/unix/sysv/linux/m68k/nptl/fork.c: New.
+ * sysdeps/unix/sysv/linux/m68k/nptl/lowlevellock.h: New.
+ * sysdeps/unix/sysv/linux/m68k/nptl/vfork.S: New.
+ * sysdeps/unix/sysv/linux/m68k/nptl/pt-vfork.S: New.
+ * sysdeps/unix/sysv/linux/m68k/nptl/createthread.c: New.
+ * sysdeps/unix/sysv/linux/m68k/nptl/pthread_once.c: New.
+ * sysdeps/unix/sysv/linux/m68k/socket.S: Update cancelation code.
+ * sysdeps/m68k/dl-tls.h: New.
+ * sysdeps/m68k/libc-tls.c: New.
+ * sysdeps/m68k/tls-macros.h: New.
+ * sysdeps/m68k/dl-machine.h (RTLD_START): Terminate stack frame to
+ generate better backtraces.
+ (elf_machine_type_class, elf_machine_rela): Handle TLS relocations.
+ * sysdeps/m68k/dl-machine.h: Handle TLS relocations.
+ * sysdeps/m68k/nptl/tcb-offsets.sym: New.
+ * sysdeps/m68k/nptl/pthread_spin_trylock.c: New.
+ * sysdeps/m68k/nptl/tls.h: New.
+ * sysdeps/m68k/nptl/pthread_spin_lock.c: New.
+ * sysdeps/m68k/nptl/pthreaddef.h: New.
+ * sysdeps/m68k/nptl/Makefile: New.
+
+ * sysdeps/unix/sysv/linux/m68k/bits/sigcontext.h: Remove.
+ * sysdeps/unix/sysv/linux/m68k/register-dump.h: Update.
+
+ * sysdeps/unix/sysv/linux/m68k/bits/siginfo.h: Remove.
+
+ * sysdeps/m68k/jmpbuf-unwind.h (_JMPBUF_CFA_UNWINDS_ADJ)
+ (_jmpbuf_sp, _JMPBUF_UNWINDS_ADJ, __libc_unwind_longjmp): Define.
+
+2010-02-10 Joseph Myers <joseph@codesourcery.com>
+
+ * sysdeps/m68k/fpu/fegetenv.c: Add hidden alias.
+
2010-01-11 Joseph Myers <joseph@codesourcery.com>
* sysdeps/unix/sysv/linux/m68k/bits/stat.h: Fix double-inclusion
diff --git a/ports/ChangeLog.mips b/ports/ChangeLog.mips
index 6d47986f6..c2609b07e 100644
--- a/ports/ChangeLog.mips
+++ b/ports/ChangeLog.mips
@@ -1,3 +1,7 @@
+2010-02-10 Joseph Myers <joseph@codesourcery.com>
+
+ * sysdeps/mips/fpu/fegetenv.c: Add hidden alias.
+
2010-01-28 Joseph Myers <joseph@codesourcery.com>
* sysdeps/mips/dl-trampoline.c (_dl_runtime_resolve,
diff --git a/ports/ChangeLog.powerpc b/ports/ChangeLog.powerpc
index 136f97a2a..390c774d2 100644
--- a/ports/ChangeLog.powerpc
+++ b/ports/ChangeLog.powerpc
@@ -1,3 +1,7 @@
+2010-02-10 Joseph Myers <joseph@codesourcery.com>
+
+ * sysdeps/powerpc/nofpu/fegetenv.c: Add hidden alias.
+
2008-08-19 Joseph Myers <joseph@codesourcery.com>
* sysdeps/powerpc/nofpu/shlib-versions: New.
diff --git a/ports/sysdeps/arm/eabi/fegetenv.c b/ports/sysdeps/arm/eabi/fegetenv.c
index 35bfac843..049138194 100644
--- a/ports/sysdeps/arm/eabi/fegetenv.c
+++ b/ports/sysdeps/arm/eabi/fegetenv.c
@@ -1,5 +1,5 @@
/* Store current floating-point environment.
- Copyright (C) 1997,98,99,2000,01,05 Free Software Foundation, Inc.
+ Copyright (C) 1997,98,99,2000,01,05,10 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -48,4 +48,5 @@ strong_alias (__fegetenv, __old_fegetenv)
compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1);
#endif
+libm_hidden_ver (__fegetenv, fegetenv)
versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2);
diff --git a/ports/sysdeps/arm/fpu/fegetenv.c b/ports/sysdeps/arm/fpu/fegetenv.c
index 0b40f183e..e59d2ebc4 100644
--- a/ports/sysdeps/arm/fpu/fegetenv.c
+++ b/ports/sysdeps/arm/fpu/fegetenv.c
@@ -1,5 +1,5 @@
/* Store current floating-point environment.
- Copyright (C) 1997,98,99,2000,01 Free Software Foundation, Inc.
+ Copyright (C) 1997,98,99,2000,01,10 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -37,4 +37,5 @@ strong_alias (__fegetenv, __old_fegetenv)
compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1);
#endif
+libm_hidden_ver (__fegetenv, fegetenv)
versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2);
diff --git a/ports/sysdeps/hppa/nptl/tls.h b/ports/sysdeps/hppa/nptl/tls.h
index 2810d713f..389cbb949 100644
--- a/ports/sysdeps/hppa/nptl/tls.h
+++ b/ports/sysdeps/hppa/nptl/tls.h
@@ -1,5 +1,5 @@
/* Definition for thread-local data handling. NPTL/hppa version.
- Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+ Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -140,11 +140,13 @@ static inline struct pthread *__get_cr27(void)
return (struct pthread *) cr27;
}
+/* We write to cr27, clobber r26 as the input argument, and clobber
+ r31 as the link register. */
static inline void __set_cr27(struct pthread *cr27)
{
asm ( "ble 0xe0(%%sr2, %%r0)\n\t"
"copy %0, %%r26"
- : : "r" (cr27) : "r26" );
+ : : "r" (cr27) : "r26", "r31" );
}
/* Get and set the global scope generation counter in struct pthread. */
diff --git a/ports/sysdeps/m68k/dl-machine.h b/ports/sysdeps/m68k/dl-machine.h
index 08a439641..9bc35e769 100644
--- a/ports/sysdeps/m68k/dl-machine.h
+++ b/ports/sysdeps/m68k/dl-machine.h
@@ -1,5 +1,6 @@
/* Machine-dependent ELF dynamic relocation inline functions. m68k version.
- Copyright (C) 1996-2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+ Copyright (C) 1996-2001, 2002, 2003, 2004, 2005, 2010
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -24,6 +25,7 @@
#include <sys/param.h>
#include <sysdep.h>
+#include <dl-tls.h>
/* Return nonzero iff ELF header is compatible with the running host. */
static inline int
@@ -121,6 +123,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
.globl _start\n\
.type _start,@function\n\
_start:\n\
+ sub.l %fp, %fp\n\
move.l %sp, -(%sp)\n\
jbsr _dl_start\n\
addq.l #4, %sp\n\
@@ -159,12 +162,16 @@ _dl_start_user:\n\
.size _dl_start_user, . - _dl_start_user\n\
.previous");
-/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry, so
- PLT entries should not be allowed to define the value.
+/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry or
+ TLS variable, so undefined references should not be allowed to
+ define the value.
ELF_RTYPE_CLASS_NOCOPY iff TYPE should not be allowed to resolve to one
of the main executable's symbols, as for a COPY reloc. */
#define elf_machine_type_class(type) \
- ((((type) == R_68K_JMP_SLOT) * ELF_RTYPE_CLASS_PLT) \
+ ((((type) == R_68K_JMP_SLOT \
+ || (type) == R_68K_TLS_DTPMOD32 \
+ || (type) == R_68K_TLS_DTPREL32 \
+ || (type) == R_68K_TLS_TPREL32) * ELF_RTYPE_CLASS_PLT) \
| (((type) == R_68K_COPY) * ELF_RTYPE_CLASS_COPY))
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
@@ -262,6 +269,25 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
case R_68K_PC32:
*reloc_addr = value + reloc->r_addend - (Elf32_Addr) reloc_addr;
break;
+#if defined USE_TLS && !defined RTLD_BOOTSTRAP
+ case R_68K_TLS_DTPMOD32:
+ /* Get the information from the link map returned by the
+ resolv function. */
+ if (sym_map != NULL)
+ *reloc_addr = sym_map->l_tls_modid;
+ break;
+ case R_68K_TLS_DTPREL32:
+ if (sym != NULL)
+ *reloc_addr = TLS_DTPREL_VALUE (sym, reloc);
+ break;
+ case R_68K_TLS_TPREL32:
+ if (sym != NULL)
+ {
+ CHECK_STATIC_TLS (map, sym_map);
+ *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc);
+ }
+ break;
+#endif /* defined USE_TLS && !defined RTLD_BOOTSTRAP */
case R_68K_NONE: /* Alright, Wilbur. */
break;
default:
diff --git a/ports/sysdeps/m68k/dl-tls.h b/ports/sysdeps/m68k/dl-tls.h
new file mode 100644
index 000000000..f2ce9982f
--- /dev/null
+++ b/ports/sysdeps/m68k/dl-tls.h
@@ -0,0 +1,48 @@
+/* Thread-local storage handling in the ELF dynamic linker. M68K version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+
+/* Type used for the representation of TLS information in the GOT. */
+typedef struct
+{
+ unsigned long int ti_module;
+ unsigned long int ti_offset;
+} tls_index;
+
+/* The thread pointer points 0x7000 past the first static TLS block. */
+#define TLS_TP_OFFSET 0x7000
+
+/* Dynamic thread vector pointers point 0x8000 past the start of each
+ TLS block. */
+#define TLS_DTV_OFFSET 0x8000
+
+/* Compute the value for a TPREL reloc. */
+#define TLS_TPREL_VALUE(sym_map, sym, reloc) \
+ ((sym_map)->l_tls_offset + (sym)->st_value + (reloc)->r_addend \
+ - TLS_TP_OFFSET)
+
+/* Compute the value for a DTPREL reloc. */
+#define TLS_DTPREL_VALUE(sym, reloc) \
+ ((sym)->st_value + (reloc)->r_addend - TLS_DTV_OFFSET)
+
+extern void *__tls_get_addr (tls_index *ti);
+
+#define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET)
+#define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET)
diff --git a/ports/sysdeps/m68k/fpu/fegetenv.c b/ports/sysdeps/m68k/fpu/fegetenv.c
index 6f23e8b74..30f3fa5a5 100644
--- a/ports/sysdeps/m68k/fpu/fegetenv.c
+++ b/ports/sysdeps/m68k/fpu/fegetenv.c
@@ -1,5 +1,5 @@
/* Store current floating-point environment.
- Copyright (C) 1997,99,2000,01 Free Software Foundation, Inc.
+ Copyright (C) 1997,99,2000,01,10 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Schwab <schwab@issan.informatik.uni-dortmund.de>
@@ -41,4 +41,5 @@ strong_alias (__fegetenv, __old_fegetenv)
compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1);
#endif
+libm_hidden_ver (__fegetenv, fegetenv)
versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2);
diff --git a/ports/sysdeps/m68k/jmpbuf-unwind.h b/ports/sysdeps/m68k/jmpbuf-unwind.h
index 3490c79bd..28b953796 100644
--- a/ports/sysdeps/m68k/jmpbuf-unwind.h
+++ b/ports/sysdeps/m68k/jmpbuf-unwind.h
@@ -1,5 +1,5 @@
/* Examine __jmp_buf for unwinding frames. m68k version.
- Copyright (C) 2006 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -18,8 +18,29 @@
02111-1307 USA. */
#include <setjmp.h>
+#include <stdint.h>
+#include <unwind.h>
/* Test if longjmp to JMPBUF would unwind the frame
containing a local variable at ADDRESS. */
#define _JMPBUF_UNWINDS(jmpbuf, address, demangle) \
((void *) (address) < (void *) demangle ((jmpbuf)->__sp))
+
+#define _JMPBUF_CFA_UNWINDS_ADJ(_jmpbuf, _context, _adj) \
+ _JMPBUF_UNWINDS_ADJ (_jmpbuf, (void *) _Unwind_GetCFA (_context), _adj)
+
+static inline uintptr_t __attribute__ ((unused))
+_jmpbuf_sp (__jmp_buf regs)
+{
+ uintptr_t sp = regs[0].__sp;
+#ifdef PTR_DEMANGLE
+ PTR_DEMANGLE (sp);
+#endif
+ return sp;
+}
+
+#define _JMPBUF_UNWINDS_ADJ(_jmpbuf, _address, _adj) \
+ ((uintptr_t) (_address) - (_adj) < _jmpbuf_sp (_jmpbuf) - (_adj))
+
+/* We use the normal longjmp for unwinding. */
+#define __libc_unwind_longjmp(buf, val) __libc_longjmp (buf, val)
diff --git a/ports/sysdeps/m68k/libc-tls.c b/ports/sysdeps/m68k/libc-tls.c
new file mode 100644
index 000000000..e865fac16
--- /dev/null
+++ b/ports/sysdeps/m68k/libc-tls.c
@@ -0,0 +1,38 @@
+/* Thread-local storage handling in the ELF dynamic linker. m68k version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <csu/libc-tls.c>
+#include <dl-tls.h>
+
+#if USE_TLS
+
+/* On M68K, linker optimizations are not required, so __tls_get_addr
+ can be called even in statically linked binaries. In this case module
+ must be always 1 and PT_TLS segment exist in the binary, otherwise it
+ would not link. */
+
+void *
+__tls_get_addr (tls_index *ti)
+{
+ dtv_t *dtv = THREAD_DTV ();
+ return (char *) dtv[1].pointer.val + GET_ADDR_OFFSET;
+}
+
+#endif
diff --git a/ports/sysdeps/m68k/m680x0/fpu/bits/mathinline.h b/ports/sysdeps/m68k/m680x0/fpu/bits/mathinline.h
index 6b69f7a49..975ffd818 100644
--- a/ports/sysdeps/m68k/m680x0/fpu/bits/mathinline.h
+++ b/ports/sysdeps/m68k/m680x0/fpu/bits/mathinline.h
@@ -1,5 +1,5 @@
/* Definitions of inline math functions implemented by the m68881/2.
- Copyright (C) 1991,92,93,94,96,97,98,99,2000,2002, 2003, 2004, 2008
+ Copyright (C) 1991,92,93,94,96,97,98,99,2000,2002, 2003, 2004, 2008, 2010
Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -85,6 +85,26 @@
: "=dm" (__result) : "f" (x), "f" (y)); \
__result != 0; })
# endif /* GCC 3.1 */
+
+/* Test for negative number. Used in the signbit() macro. */
+__MATH_INLINE int
+__NTH (__signbitf (float __x))
+{
+ __extension__ union { float __f; int __i; } __u = { __f: __x };
+ return __u.__i < 0;
+}
+__MATH_INLINE int
+__NTH (__signbit (double __x))
+{
+ __extension__ union { double __d; int __i[2]; } __u = { __d: __x };
+ return __u.__i[0] < 0;
+}
+__MATH_INLINE int
+__NTH (__signbitl (long double __x))
+{
+ __extension__ union { long double __d; int __i[3]; } __u = { __d: __x };
+ return __u.__i[0] < 0;
+}
#endif
@@ -281,17 +301,7 @@ __inline_functions(long double,l)
#ifdef __USE_ISOC99
# define __inline_functions(float_type, s) \
-__m81_defun (int, __CONCAT(__signbit,s), (float_type __value)) \
-{ \
- /* There is no branch-condition for the sign bit, so we must extract \
- and examine the condition codes manually. */ \
- unsigned long int __fpsr; \
- __asm ("ftst%.x %1\n" \
- "fmove%.l %/fpsr, %0" : "=dm" (__fpsr) : "f" (__value)); \
- return (__fpsr >> 27) & 1; \
-} \
- \
- __m81_defun (float_type, __CONCAT(__scalbln,s), \
+__m81_defun (float_type, __CONCAT(__scalbln,s), \
(float_type __x, long int __n)) \
{ \
return __CONCAT(__scalbn,s) (__x, __n); \
diff --git a/ports/sysdeps/m68k/m680x0/fpu/s_ccos.c b/ports/sysdeps/m68k/m680x0/fpu/s_ccos.c
index d302d3d86..823915964 100644
--- a/ports/sysdeps/m68k/m680x0/fpu/s_ccos.c
+++ b/ports/sysdeps/m68k/m680x0/fpu/s_ccos.c
@@ -1,5 +1,5 @@
/* Complex cosine function. m68k fpu version
- Copyright (C) 1997, 1999 Free Software Foundation, Inc.
+ Copyright (C) 1997, 1999, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Schwab <schwab@issan.informatik.uni-dortmund.de>.
@@ -48,7 +48,7 @@ s(__ccos) (__complex__ float_type x)
: "f" (__real__ x));
__real__ retval = cos_rx * m81(__ieee754_cosh) (__imag__ x);
if (rx_cond & __M81_COND_ZERO)
- __imag__ retval = (m81(__signbit) (__imag__ x)
+ __imag__ retval = (signbit (__imag__ x)
? __real__ x : -__real__ x);
else
__imag__ retval = -sin_rx * m81(__ieee754_sinh) (__imag__ x);
diff --git a/ports/sysdeps/m68k/m680x0/fpu/s_ccosh.c b/ports/sysdeps/m68k/m680x0/fpu/s_ccosh.c
index 1698881b9..d272e98f2 100644
--- a/ports/sysdeps/m68k/m680x0/fpu/s_ccosh.c
+++ b/ports/sysdeps/m68k/m680x0/fpu/s_ccosh.c
@@ -1,5 +1,5 @@
/* Complex cosine hyperbole function. m68k fpu version
- Copyright (C) 1997, 1999 Free Software Foundation, Inc.
+ Copyright (C) 1997, 1999, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Schwab <schwab@issan.informatik.uni-dortmund.de>.
@@ -48,7 +48,7 @@ s(__ccosh) (__complex__ float_type x)
: "f" (__imag__ x));
__real__ retval = cos_ix * m81(__ieee754_cosh) (__real__ x);
if (ix_cond & __M81_COND_ZERO)
- __imag__ retval = (m81(__signbit) (__real__ x)
+ __imag__ retval = (signbit (__real__ x)
? -__imag__ x : __imag__ x);
else
__imag__ retval = sin_ix * m81(__ieee754_sinh) (__real__ x);
diff --git a/ports/sysdeps/m68k/nptl/Makefile b/ports/sysdeps/m68k/nptl/Makefile
new file mode 100644
index 000000000..f36fc8fa3
--- /dev/null
+++ b/ports/sysdeps/m68k/nptl/Makefile
@@ -0,0 +1,22 @@
+# Copyright (C) 2010 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+# Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, write to the Free
+# Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+# 02111-1307 USA.
+
+ifeq ($(subdir),csu)
+gen-as-const-headers += tcb-offsets.sym
+endif
diff --git a/ports/sysdeps/m68k/nptl/pthread_spin_lock.c b/ports/sysdeps/m68k/nptl/pthread_spin_lock.c
new file mode 100644
index 000000000..1cc16c8b9
--- /dev/null
+++ b/ports/sysdeps/m68k/nptl/pthread_spin_lock.c
@@ -0,0 +1,31 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <atomic.h>
+#include "pthreadP.h"
+
+int
+pthread_spin_lock (pthread_spinlock_t *lock)
+{
+ while (atomic_compare_and_exchange_val_acq(lock, 1, 0) != 0)
+ while (*lock != 0)
+ ;
+
+ return 0;
+}
diff --git a/ports/sysdeps/m68k/nptl/pthread_spin_trylock.c b/ports/sysdeps/m68k/nptl/pthread_spin_trylock.c
new file mode 100644
index 000000000..831bffb60
--- /dev/null
+++ b/ports/sysdeps/m68k/nptl/pthread_spin_trylock.c
@@ -0,0 +1,28 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <errno.h>
+#include <atomic.h>
+#include "pthreadP.h"
+
+int
+pthread_spin_trylock (pthread_spinlock_t *lock)
+{
+ return atomic_compare_and_exchange_val_acq(lock, 1, 0) ? EBUSY : 0;
+}
diff --git a/ports/sysdeps/m68k/nptl/pthreaddef.h b/ports/sysdeps/m68k/nptl/pthreaddef.h
new file mode 100644
index 000000000..0a549070f
--- /dev/null
+++ b/ports/sysdeps/m68k/nptl/pthreaddef.h
@@ -0,0 +1,39 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* Default stack size. */
+#define ARCH_STACK_DEFAULT_SIZE (2 * 1024 * 1024)
+
+/* Required stack pointer alignment at beginning. */
+#define STACK_ALIGN 16
+
+/* Minimal stack size after allocating thread descriptor and guard size. */
+#define MINIMAL_REST_STACK 2048
+
+/* Alignment requirement for TCB. */
+#define TCB_ALIGNMENT 16
+
+
+/* Location of current stack frame. */
+#define CURRENT_STACK_FRAME __builtin_frame_address (0)
+
+
+/* XXX Until we have a better place keep the definitions here. */
+#define __exit_thread_inline(val) \
+ INLINE_SYSCALL (exit, 1, (val))
diff --git a/ports/sysdeps/m68k/nptl/tcb-offsets.sym b/ports/sysdeps/m68k/nptl/tcb-offsets.sym
new file mode 100644
index 000000000..b1bba6586
--- /dev/null
+++ b/ports/sysdeps/m68k/nptl/tcb-offsets.sym
@@ -0,0 +1,11 @@
+#include <sysdep.h>
+#include <tls.h>
+
+--
+
+-- Derive offsets relative to the thread register.
+#define thread_offsetof(mem) (long)(offsetof(struct pthread, mem) - TLS_TCB_OFFSET - TLS_PRE_TCB_SIZE)
+
+MULTIPLE_THREADS_OFFSET thread_offsetof (header.multiple_threads)
+PID_OFFSET thread_offsetof (pid)
+TID_OFFSET thread_offsetof (tid)
diff --git a/ports/sysdeps/m68k/nptl/tls.h b/ports/sysdeps/m68k/nptl/tls.h
new file mode 100644
index 000000000..c29824cfb
--- /dev/null
+++ b/ports/sysdeps/m68k/nptl/tls.h
@@ -0,0 +1,171 @@
+/* Definition for thread-local data handling. NPTL/m68k version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _TLS_H
+#define _TLS_H 1
+
+#include <dl-sysdep.h>
+
+#ifndef __ASSEMBLER__
+# include <stdbool.h>
+# include <stddef.h>
+# include <stdint.h>
+
+/* Type for the dtv. */
+typedef union dtv
+{
+ size_t counter;
+ struct
+ {
+ void *val;
+ bool is_static;
+ } pointer;
+} dtv_t;
+
+#else /* __ASSEMBLER__ */
+# include <tcb-offsets.h>
+#endif /* __ASSEMBLER__ */
+
+/* Signal that TLS support is available. */
+#define USE_TLS 1
+
+#ifndef __ASSEMBLER__
+
+/* Get system call information. */
+# include <sysdep.h>
+
+/* The TP points to the start of the thread blocks. */
+# define TLS_DTV_AT_TP 1
+
+/* Get the thread descriptor definition. */
+# include <nptl/descr.h>
+
+typedef struct
+{
+ dtv_t *dtv;
+ void *private;
+} tcbhead_t;
+
+/* This is the size of the initial TCB. Because our TCB is before the thread
+ pointer, we don't need this. */
+# define TLS_INIT_TCB_SIZE 0
+
+/* Alignment requirements for the initial TCB. */
+# define TLS_INIT_TCB_ALIGN __alignof__ (struct pthread)
+
+/* This is the size of the TCB. Because our TCB is before the thread
+ pointer, we don't need this. */
+# define TLS_TCB_SIZE 0
+
+/* Alignment requirements for the TCB. */
+# define TLS_TCB_ALIGN __alignof__ (struct pthread)
+
+/* This is the size we need before TCB - actually, it includes the TCB. */
+# define TLS_PRE_TCB_SIZE \
+ (sizeof (struct pthread) \
+ + ((sizeof (tcbhead_t) + TLS_TCB_ALIGN - 1) & ~(TLS_TCB_ALIGN - 1)))
+
+/* The thread pointer (TP) points to the end of the
+ TCB + 0x7000, as for PowerPC and MIPS. This implies that TCB address is
+ TP - 0x7000. As we define TLS_DTV_AT_TP we can
+ assume that the pthread struct is allocated immediately ahead of the
+ TCB. This implies that the pthread_descr address is
+ TP - (TLS_PRE_TCB_SIZE + 0x7000). */
+# define TLS_TCB_OFFSET 0x7000
+
+/* Install the dtv pointer. The pointer passed is to the element with
+ index -1 which contain the length. */
+# define INSTALL_DTV(tcbp, dtvp) \
+ ((tcbhead_t *) (tcbp))[-1].dtv = dtvp + 1
+
+/* Install new dtv for current thread. */
+# define INSTALL_NEW_DTV(dtv) \
+ (THREAD_DTV () = (dtv))
+
+/* Return dtv of given thread descriptor. */
+# define GET_DTV(tcbp) \
+ (((tcbhead_t *) (tcbp))[-1].dtv)
+
+/* Code to initially initialize the thread pointer. This might need
+ special attention since 'errno' is not yet available and if the
+ operation can cause a failure 'errno' must not be touched. */
+# define TLS_INIT_TP(tcbp, secondcall) \
+ ({ \
+ INTERNAL_SYSCALL_DECL (err); \
+ int _sys_result; \
+ \
+ _sys_result = INTERNAL_SYSCALL (set_thread_area, err, 1, \
+ ((void *) (tcbp)) + TLS_TCB_OFFSET); \
+ INTERNAL_SYSCALL_ERROR_P (_sys_result, err) ? "unknown error" : NULL; })
+
+extern void * __m68k_read_tp (void);
+
+/* Return the address of the dtv for the current thread. */
+# define THREAD_DTV() \
+ (((tcbhead_t *) (__m68k_read_tp () - TLS_TCB_OFFSET))[-1].dtv)
+
+/* Return the thread descriptor for the current thread. */
+# define THREAD_SELF \
+ ((struct pthread *) (__m68k_read_tp () - TLS_TCB_OFFSET - TLS_PRE_TCB_SIZE))
+
+/* Magic for libthread_db to know how to do THREAD_SELF. */
+# define DB_THREAD_SELF \
+ CONST_THREAD_AREA (32, TLS_TCB_OFFSET + TLS_PRE_TCB_SIZE)
+
+/* Access to data in the thread descriptor is easy. */
+# define THREAD_GETMEM(descr, member) \
+ descr->member
+# define THREAD_GETMEM_NC(descr, member, idx) \
+ descr->member[idx]
+# define THREAD_SETMEM(descr, member, value) \
+ descr->member = (value)
+# define THREAD_SETMEM_NC(descr, member, idx, value) \
+ descr->member[idx] = (value)
+
+/* l_tls_offset == 0 is perfectly valid on M68K, so we have to use some
+ different value to mean unset l_tls_offset. */
+# define NO_TLS_OFFSET -1
+
+/* Get and set the global scope generation counter in struct pthread. */
+#define THREAD_GSCOPE_FLAG_UNUSED 0
+#define THREAD_GSCOPE_FLAG_USED 1
+#define THREAD_GSCOPE_FLAG_WAIT 2
+#define THREAD_GSCOPE_RESET_FLAG() \
+ do \
+ { int __res \
+ = atomic_exchange_rel (&THREAD_SELF->header.gscope_flag, \
+ THREAD_GSCOPE_FLAG_UNUSED); \
+ if (__res == THREAD_GSCOPE_FLAG_WAIT) \
+ lll_futex_wake (&THREAD_SELF->header.gscope_flag, 1, LLL_PRIVATE); \
+ } \
+ while (0)
+#define THREAD_GSCOPE_SET_FLAG() \
+ do \
+ { \
+ THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED; \
+ atomic_write_barrier (); \
+ } \
+ while (0)
+#define THREAD_GSCOPE_WAIT() \
+ GL(dl_wait_lookup_done) ()
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* tls.h */
diff --git a/ports/sysdeps/m68k/tls-macros.h b/ports/sysdeps/m68k/tls-macros.h
new file mode 100644
index 000000000..d03d7b958
--- /dev/null
+++ b/ports/sysdeps/m68k/tls-macros.h
@@ -0,0 +1,70 @@
+/* Macros for accessing thread-local storage. m68k version.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#define TLS_GD(x) \
+ ({ \
+ void *__result; \
+ extern void *__tls_get_addr (void *); \
+ \
+ asm ("movel #_GLOBAL_OFFSET_TABLE_@GOTPC, %0\n\t" \
+ "lea (-6, %%pc, %0), %0\n\t" \
+ "lea " #x "@TLSGD(%0), %0" \
+ : "=&a" (__result)); \
+ (int *) __tls_get_addr (__result); })
+
+#define TLS_LD(x) \
+ ({ \
+ char *__tp; \
+ int __offset; \
+ extern void *__tls_get_addr (void *); \
+ \
+ asm ("movel #_GLOBAL_OFFSET_TABLE_@GOTPC, %0\n\t" \
+ "lea (-6, %%pc, %0), %0\n\t" \
+ "lea " #x "@TLSLDM(%0), %0" \
+ : "=&a" (__tp)); \
+ __tp = (char *) __tls_get_addr (__tp); \
+ asm ("movel #" #x "@TLSLDO, %0" \
+ : "=a" (__offset)); \
+ (int *) (__tp + __offset); })
+
+#define TLS_IE(x) \
+ ({ \
+ char *__tp; \
+ int __offset; \
+ extern void * __m68k_read_tp (void); \
+ \
+ __tp = (char *) __m68k_read_tp (); \
+ asm ("movel #_GLOBAL_OFFSET_TABLE_@GOTPC, %0\n\t" \
+ "lea (-6, %%pc, %0), %0\n\t" \
+ "movel " #x "@TLSIE(%0), %0" \
+ : "=&a" (__offset)); \
+ (int *) (__tp + __offset); })
+
+#define TLS_LE(x) \
+ ({ \
+ char *__tp; \
+ int __offset; \
+ extern void * __m68k_read_tp (void); \
+ \
+ __tp = (char *) __m68k_read_tp (); \
+ asm ("movel #" #x "@TLSLE, %0" \
+ : "=a" (__offset)); \
+ (int *) (__tp + __offset); })
+
diff --git a/ports/sysdeps/mips/fpu/fegetenv.c b/ports/sysdeps/mips/fpu/fegetenv.c
index c1741385f..da5d59774 100644
--- a/ports/sysdeps/mips/fpu/fegetenv.c
+++ b/ports/sysdeps/mips/fpu/fegetenv.c
@@ -1,5 +1,5 @@
/* Store current floating-point environment.
- Copyright (C) 1998, 1999, 2000, 2002 Free Software Foundation, Inc.
+ Copyright (C) 1998, 1999, 2000, 2002, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 1998.
@@ -29,3 +29,4 @@ fegetenv (fenv_t *envp)
/* Success. */
return 0;
}
+libm_hidden_def (fegetenv)
diff --git a/ports/sysdeps/powerpc/nofpu/fegetenv.c b/ports/sysdeps/powerpc/nofpu/fegetenv.c
index 3cc8b13da..64c0e8c48 100644
--- a/ports/sysdeps/powerpc/nofpu/fegetenv.c
+++ b/ports/sysdeps/powerpc/nofpu/fegetenv.c
@@ -1,6 +1,6 @@
/* Store current floating-point environment (soft-float edition).
Copyright (C) 2002 Free Software Foundation, Inc.
- Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002, 2010.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -46,4 +46,5 @@ strong_alias (__fegetenv, __old_fegetenv)
compat_symbol (libm, BP_SYM (__old_fegetenv), BP_SYM (fegetenv), GLIBC_2_1);
#endif
+libm_hidden_ver (__fegetenv, fegetenv)
versioned_symbol (libm, BP_SYM (__fegetenv), BP_SYM (fegetenv), GLIBC_2_2);
diff --git a/ports/sysdeps/unix/sysv/linux/hppa/bits/mman.h b/ports/sysdeps/unix/sysv/linux/hppa/bits/mman.h
index f06532217..780862a38 100644
--- a/ports/sysdeps/unix/sysv/linux/hppa/bits/mman.h
+++ b/ports/sysdeps/unix/sysv/linux/hppa/bits/mman.h
@@ -86,6 +86,8 @@
# define MADV_REMOVE 9 /* Remove these pages and resources. */
# define MADV_DONTFORK 10 /* Do not inherit across fork. */
# define MADV_DOFORK 11 /* Do inherit across fork. */
+# define MADV_MERGEABLE 65 /* KSM may merge identical pages */
+# define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */
#endif
/* The range 12-64 is reserved for page size specification. */
diff --git a/ports/sysdeps/unix/sysv/linux/hppa/bits/socket.h b/ports/sysdeps/unix/sysv/linux/hppa/bits/socket.h
index bcc27ccc6..819b39820 100644
--- a/ports/sysdeps/unix/sysv/linux/hppa/bits/socket.h
+++ b/ports/sysdeps/unix/sysv/linux/hppa/bits/socket.h
@@ -62,7 +62,7 @@ enum __socket_type
/* Flags to be ORed into the type parameter of socket and socketpair and
used for the flags parameter of paccept. */
- SOCK_CLOEXEC = 01000000, /* Atomically set close-on-exec flag for the
+ SOCK_CLOEXEC = 010000000, /* Atomically set close-on-exec flag for the
new descriptor(s). */
#define SOCK_CLOEXEC SOCK_CLOEXEC
#undef SOCK_NONBLOCK
diff --git a/ports/sysdeps/unix/sysv/linux/hppa/makecontext.c b/ports/sysdeps/unix/sysv/linux/hppa/makecontext.c
index 69a18135d..cb036d041 100644
--- a/ports/sysdeps/unix/sysv/linux/hppa/makecontext.c
+++ b/ports/sysdeps/unix/sysv/linux/hppa/makecontext.c
@@ -1,5 +1,5 @@
/* Create new context.
- Copyright (C) 2008 Free Software Foundation, Inc.
+ Copyright (C) 2008, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Helge Deller <deller@gmx.de>, 2008.
@@ -25,24 +25,21 @@
#include <sysdep.h>
#include <ucontext.h>
-/* XXX: This implementation only handles integer arguments. */
+/* POSIX only supports integer arguments. */
+#define STACK_ALIGN 64
+#define FRAME_SIZE 8
void
__makecontext (ucontext_t *ucp, void (*func) (void), int argc, ...)
{
- unsigned int *sp;
+ unsigned long *sp;
va_list ap;
int i;
- if (argc > 8)
- {
- fprintf (stderr, _("\
-makecontext: does not know how to handle more than 8 arguments\n"));
- exit (-1);
- }
-
- /* Get stack pointer. */
- sp = (unsigned int *) ucp->uc_stack.ss_sp;
+ /* Get stack pointer (64-byte aligned). */
+ sp = (unsigned long *)((((unsigned long) ucp->uc_stack.ss_sp)
+ + FRAME_SIZE + argc + STACK_ALIGN)
+ & ~(STACK_ALIGN - 1));
/* Store address to jump to. */
ucp->uc_mcontext.sc_gr[2] = (unsigned long) func;
@@ -50,29 +47,27 @@ makecontext: does not know how to handle more than 8 arguments\n"));
va_start (ap, argc);
/* Handle arguments. */
for (i = 0; i < argc; ++i)
- switch (i)
- {
- case 0:
- case 1:
- case 2:
- case 3:
- ucp->uc_mcontext.sc_gr[26-i] = va_arg (ap, int);
- break;
- case 4:
- case 5:
- case 6:
- case 7:
- if (sizeof(unsigned long) == 4) {
- /* 32bit: put arg7-arg4 on stack. */
- sp[7-i] = va_arg (ap, int);
- } else {
- /* 64bit: r19-r22 are arg7-arg4. */
- ucp->uc_mcontext.sc_gr[22+4-i] = va_arg (ap, int);
+ {
+ if (i < 4)
+ {
+ ucp->uc_mcontext.sc_gr[26-i] = va_arg (ap, int);
+ continue;
}
- break;
- }
- va_end (ap);
+ if ((i < 8) && (sizeof(unsigned long) == 8))
+ {
+ /* 64bit: r19-r22 are arg7-arg4. */
+ ucp->uc_mcontext.sc_gr[22+4-i] = va_arg (ap, int);
+ continue;
+ }
+
+ /* All other arguments go on the stack. */
+ sp[-1 * (FRAME_SIZE + 1 + i)] = va_arg (ap, int);
+ }
+ va_end (ap);
+
+ /* Adjust the stack pointer to last used argument. */
+ ucp->uc_mcontext.sc_gr[30] = (unsigned long) sp;
}
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/Makefile b/ports/sysdeps/unix/sysv/linux/m68k/Makefile
index 6bb4f6b81..97d95847f 100644
--- a/ports/sysdeps/unix/sysv/linux/m68k/Makefile
+++ b/ports/sysdeps/unix/sysv/linux/m68k/Makefile
@@ -2,12 +2,18 @@
m68k-syntax-flag = -DMOTOROLA_SYNTAX
+ifeq ($(subdir),csu)
+sysdep_routines += m68k-helpers
+endif
+
ifeq ($(subdir),misc)
sysdep_routines += mremap
sysdep_headers += sys/reg.h
endif
ifeq ($(subdir),elf)
+sysdep_routines += dl-vdso libc-m68k-vdso
+sysdep-rtld-routines += m68k-vdso
sysdep-others += lddlibc4
install-bin += lddlibc4
endif
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/Versions b/ports/sysdeps/unix/sysv/linux/m68k/Versions
index 5650f7f77..8a941f837 100644
--- a/ports/sysdeps/unix/sysv/linux/m68k/Versions
+++ b/ports/sysdeps/unix/sysv/linux/m68k/Versions
@@ -32,4 +32,18 @@ libc {
GLIBC_2.11 {
fallocate64;
}
+ GLIBC_2.12 {
+ __m68k_read_tp;
+ }
+ GLIBC_PRIVATE {
+ __vdso_atomic_cmpxchg_32; __vdso_atomic_barrier;
+ }
+}
+
+ld {
+ GLIBC_PRIVATE {
+ __rtld___vdso_read_tp;
+ __rtld___vdso_atomic_cmpxchg_32;
+ __rtld___vdso_atomic_barrier;
+ }
}
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/bits/m68k-vdso.h b/ports/sysdeps/unix/sysv/linux/m68k/bits/m68k-vdso.h
new file mode 100644
index 000000000..c36106017
--- /dev/null
+++ b/ports/sysdeps/unix/sysv/linux/m68k/bits/m68k-vdso.h
@@ -0,0 +1,61 @@
+/* Resolve function pointers to VDSO functions.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+
+#ifndef _M68K_VDSO_H
+#define _M68K_VDSO_H
+
+#ifdef SHARED
+
+# ifdef IS_IN_rtld
+# define M68K_VDSO_SYMBOL(name) __rtld_##name
+# define STR_M68K_VDSO_SYMBOL(name) "__rtld_" #name
+# else
+# define M68K_VDSO_SYMBOL(name) name
+# define STR_M68K_VDSO_SYMBOL(name) #name
+# endif
+
+# ifndef __ASSEMBLER__
+
+/* We define __rtld_* copies for rtld.
+ We need them visible in libc to initialize. */
+# if defined IS_IN_rtld || !defined NOT_IN_libc
+extern void *__rtld___vdso_read_tp;
+extern void *__rtld___vdso_atomic_cmpxchg_32;
+extern void *__rtld___vdso_atomic_barrier;
+
+/* These stubs are meant to be invoked only from the assembly. */
+extern void __vdso_read_tp_stub (void);
+extern void __vdso_atomic_cmpxchg_32_stub (void);
+extern void __vdso_atomic_barrier_stub (void);
+# endif /* IS_IN_rtld || !NOT_IN_libc */
+
+/* RTLD should only use its own copies. */
+# ifndef IS_IN_rtld
+extern void *__vdso_read_tp;
+extern void *__vdso_atomic_cmpxchg_32;
+extern void *__vdso_atomic_barrier;
+# endif /* !IS_IN_rtld */
+
+# endif /* !__ASSEMBLER__ */
+
+#endif /* SHARED */
+
+#endif /* _M68K_VDSO_H */
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/bits/sigcontext.h b/ports/sysdeps/unix/sysv/linux/m68k/bits/sigcontext.h
deleted file mode 100644
index 8ad0c9656..000000000
--- a/ports/sysdeps/unix/sysv/linux/m68k/bits/sigcontext.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* Copyright (C) 2006 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#if !defined _SIGNAL_H && !defined _SYS_UCONTEXT_H
-# error "Never use <bits/sigcontext.h> directly; include <signal.h> instead."
-#endif
-
-#ifndef _BITS_SIGCONTEXT_H
-#define _BITS_SIGCONTEXT_H 1
-
-struct sigcontext {
- unsigned long sc_mask;
- unsigned long sc_usp;
- unsigned long sc_d0;
- unsigned long sc_d1;
-#ifdef __mcoldfire__
- unsigned long sc_d2;
- unsigned long sc_d3;
- unsigned long sc_d4;
- unsigned long sc_d5;
- unsigned long sc_d6;
- unsigned long sc_d7;
-#endif
- unsigned long sc_a0;
- unsigned long sc_a1;
-#ifdef __mcoldfire__
- unsigned long sc_a2;
- unsigned long sc_a3;
- unsigned long sc_a4;
- unsigned long sc_a5;
- unsigned long sc_a6;
-#endif
- unsigned short sc_sr;
- unsigned long sc_pc;
- unsigned short sc_formatvec;
-#ifdef __mcoldfire__
- unsigned long sc_fpregs[8][2];
- unsigned long sc_fpcntl[3];
- unsigned char sc_fpstate[16];
-#else
- unsigned long sc_fpregs[2*3];
- unsigned long sc_fpcntl[3];
- unsigned char sc_fpstate[216];
-#endif
-};
-
-#endif
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/bits/siginfo.h b/ports/sysdeps/unix/sysv/linux/m68k/bits/siginfo.h
deleted file mode 100644
index 1ded1c2a4..000000000
--- a/ports/sysdeps/unix/sysv/linux/m68k/bits/siginfo.h
+++ /dev/null
@@ -1,316 +0,0 @@
-/* siginfo_t, sigevent and constants. m68k linux version.
- Copyright (C) 1997-2002, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#if !defined _SIGNAL_H && !defined __need_siginfo_t \
- && !defined __need_sigevent_t
-# error "Never include this file directly. Use <signal.h> instead"
-#endif
-
-#include <bits/wordsize.h>
-
-#if (!defined __have_sigval_t \
- && (defined _SIGNAL_H || defined __need_siginfo_t \
- || defined __need_sigevent_t))
-# define __have_sigval_t 1
-
-/* Type for data associated with a signal. */
-typedef union sigval
- {
- int sival_int;
- void *sival_ptr;
- } sigval_t;
-#endif
-
-#if (!defined __have_siginfo_t \
- && (defined _SIGNAL_H || defined __need_siginfo_t))
-# define __have_siginfo_t 1
-
-# define __SI_MAX_SIZE 128
-# if __WORDSIZE == 64
-# define __SI_PAD_SIZE ((__SI_MAX_SIZE / sizeof (int)) - 4)
-# else
-# define __SI_PAD_SIZE ((__SI_MAX_SIZE / sizeof (int)) - 3)
-# endif
-
-typedef struct siginfo
- {
- int si_signo; /* Signal number. */
- int si_errno; /* If non-zero, an errno value associated with
- this signal, as defined in <errno.h>. */
- int si_code; /* Signal code. */
-
- union
- {
- int _pad[__SI_PAD_SIZE];
-
- /* kill(). */
- struct
- {
- __pid_t si_pid; /* Sending process ID. */
- unsigned short __pad; /* 16-bit version of si_uid. */
- __uid_t si_uid; /* Real user ID of sending process. */
- } _kill;
-
- /* POSIX.1b timers. */
- struct
- {
- int si_tid; /* Timer ID. */
- int si_overrun; /* Overrun count. */
- sigval_t si_sigval; /* Signal value. */
- } _timer;
-
- /* POSIX.1b signals. */
- struct
- {
- __pid_t si_pid; /* Sending process ID. */
- unsigned short __pad; /* 16-bit version of si_uid. */
- sigval_t si_sigval; /* Signal value. */
- __uid_t si_uid; /* Real user ID of sending process. */
- } _rt;
-
- /* SIGCHLD. */
- struct
- {
- __pid_t si_pid; /* Which child. */
- unsigned short __pad; /* 16-bit version of si_uid. */
- int si_status; /* Exit value or signal. */
- __clock_t si_utime;
- __clock_t si_stime;
- __uid_t si_uid; /* Real user ID of sending process. */
- } _sigchld;
-
- /* SIGILL, SIGFPE, SIGSEGV, SIGBUS. */
- struct
- {
- void *si_addr; /* Faulting insn/memory ref. */
- } _sigfault;
-
- /* SIGPOLL. */
- struct
- {
- long int si_band; /* Band event for SIGPOLL. */
- int si_fd;
- } _sigpoll;
- } _sifields;
- } siginfo_t;
-
-
-/* X/Open requires some more fields with fixed names. */
-# define si_pid _sifields._kill.si_pid
-# define si_uid _sifields._kill.si_uid
-# define si_timerid _sifields._timer.si_tid
-# define si_overrun _sifields._timer.si_overrun
-# define si_status _sifields._sigchld.si_status
-# define si_utime _sifields._sigchld.si_utime
-# define si_stime _sifields._sigchld.si_stime
-# define si_value _sifields._rt.si_sigval
-# define si_int _sifields._rt.si_sigval.sival_int
-# define si_ptr _sifields._rt.si_sigval.sival_ptr
-# define si_addr _sifields._sigfault.si_addr
-# define si_band _sifields._sigpoll.si_band
-# define si_fd _sifields._sigpoll.si_fd
-
-
-/* Values for `si_code'. Positive values are reserved for kernel-generated
- signals. */
-enum
-{
- SI_ASYNCNL = -60, /* Sent by asynch name lookup completion. */
-# define SI_ASYNCNL SI_ASYNCNL
- SI_TKILL = -6, /* Sent by tkill. */
-# define SI_TKILL SI_TKILL
- SI_SIGIO, /* Sent by queued SIGIO. */
-# define SI_SIGIO SI_SIGIO
- SI_ASYNCIO, /* Sent by AIO completion. */
-# define SI_ASYNCIO SI_ASYNCIO
- SI_MESGQ, /* Sent by real time mesq state change. */
-# define SI_MESGQ SI_MESGQ
- SI_TIMER, /* Sent by timer expiration. */
-# define SI_TIMER SI_TIMER
- SI_QUEUE, /* Sent by sigqueue. */
-# define SI_QUEUE SI_QUEUE
- SI_USER, /* Sent by kill, sigsend, raise. */
-# define SI_USER SI_USER
- SI_KERNEL = 0x80 /* Send by kernel. */
-#define SI_KERNEL SI_KERNEL
-};
-
-
-/* `si_code' values for SIGILL signal. */
-enum
-{
- ILL_ILLOPC = 1, /* Illegal opcode. */
-# define ILL_ILLOPC ILL_ILLOPC
- ILL_ILLOPN, /* Illegal operand. */
-# define ILL_ILLOPN ILL_ILLOPN
- ILL_ILLADR, /* Illegal addressing mode. */
-# define ILL_ILLADR ILL_ILLADR
- ILL_ILLTRP, /* Illegal trap. */
-# define ILL_ILLTRP ILL_ILLTRP
- ILL_PRVOPC, /* Privileged opcode. */
-# define ILL_PRVOPC ILL_PRVOPC
- ILL_PRVREG, /* Privileged register. */
-# define ILL_PRVREG ILL_PRVREG
- ILL_COPROC, /* Coprocessor error. */
-# define ILL_COPROC ILL_COPROC
- ILL_BADSTK /* Internal stack error. */
-# define ILL_BADSTK ILL_BADSTK
-};
-
-/* `si_code' values for SIGFPE signal. */
-enum
-{
- FPE_INTDIV = 1, /* Integer divide by zero. */
-# define FPE_INTDIV FPE_INTDIV
- FPE_INTOVF, /* Integer overflow. */
-# define FPE_INTOVF FPE_INTOVF
- FPE_FLTDIV, /* Floating point divide by zero. */
-# define FPE_FLTDIV FPE_FLTDIV
- FPE_FLTOVF, /* Floating point overflow. */
-# define FPE_FLTOVF FPE_FLTOVF
- FPE_FLTUND, /* Floating point underflow. */
-# define FPE_FLTUND FPE_FLTUND
- FPE_FLTRES, /* Floating point inexact result. */
-# define FPE_FLTRES FPE_FLTRES
- FPE_FLTINV, /* Floating point invalid operation. */
-# define FPE_FLTINV FPE_FLTINV
- FPE_FLTSUB /* Subscript out of range. */
-# define FPE_FLTSUB FPE_FLTSUB
-};
-
-/* `si_code' values for SIGSEGV signal. */
-enum
-{
- SEGV_MAPERR = 1, /* Address not mapped to object. */
-# define SEGV_MAPERR SEGV_MAPERR
- SEGV_ACCERR /* Invalid permissions for mapped object. */
-# define SEGV_ACCERR SEGV_ACCERR
-};
-
-/* `si_code' values for SIGBUS signal. */
-enum
-{
- BUS_ADRALN = 1, /* Invalid address alignment. */
-# define BUS_ADRALN BUS_ADRALN
- BUS_ADRERR, /* Non-existant physical address. */
-# define BUS_ADRERR BUS_ADRERR
- BUS_OBJERR /* Object specific hardware error. */
-# define BUS_OBJERR BUS_OBJERR
-};
-
-/* `si_code' values for SIGTRAP signal. */
-enum
-{
- TRAP_BRKPT = 1, /* Process breakpoint. */
-# define TRAP_BRKPT TRAP_BRKPT
- TRAP_TRACE /* Process trace trap. */
-# define TRAP_TRACE TRAP_TRACE
-};
-
-/* `si_code' values for SIGCHLD signal. */
-enum
-{
- CLD_EXITED = 1, /* Child has exited. */
-# define CLD_EXITED CLD_EXITED
- CLD_KILLED, /* Child was killed. */
-# define CLD_KILLED CLD_KILLED
- CLD_DUMPED, /* Child terminated abnormally. */
-# define CLD_DUMPED CLD_DUMPED
- CLD_TRAPPED, /* Traced child has trapped. */
-# define CLD_TRAPPED CLD_TRAPPED
- CLD_STOPPED, /* Child has stopped. */
-# define CLD_STOPPED CLD_STOPPED
- CLD_CONTINUED /* Stopped child has continued. */
-# define CLD_CONTINUED CLD_CONTINUED
-};
-
-/* `si_code' values for SIGPOLL signal. */
-enum
-{
- POLL_IN = 1, /* Data input available. */
-# define POLL_IN POLL_IN
- POLL_OUT, /* Output buffers available. */
-# define POLL_OUT POLL_OUT
- POLL_MSG, /* Input message available. */
-# define POLL_MSG POLL_MSG
- POLL_ERR, /* I/O error. */
-# define POLL_ERR POLL_ERR
- POLL_PRI, /* High priority input available. */
-# define POLL_PRI POLL_PRI
- POLL_HUP /* Device disconnected. */
-# define POLL_HUP POLL_HUP
-};
-
-# undef __need_siginfo_t
-#endif /* !have siginfo_t && (have _SIGNAL_H || need siginfo_t). */
-
-
-#if (defined _SIGNAL_H || defined __need_sigevent_t) \
- && !defined __have_sigevent_t
-# define __have_sigevent_t 1
-
-/* Structure to transport application-defined values with signals. */
-# define __SIGEV_MAX_SIZE 64
-# if __WORDSIZE == 64
-# define __SIGEV_PAD_SIZE ((__SIGEV_MAX_SIZE / sizeof (int)) - 4)
-# else
-# define __SIGEV_PAD_SIZE ((__SIGEV_MAX_SIZE / sizeof (int)) - 3)
-# endif
-
-typedef struct sigevent
- {
- sigval_t sigev_value;
- int sigev_signo;
- int sigev_notify;
-
- union
- {
- int _pad[__SIGEV_PAD_SIZE];
-
- /* When SIGEV_SIGNAL and SIGEV_THREAD_ID set, LWP ID of the
- thread to receive the signal. */
- __pid_t _tid;
-
- struct
- {
- void (*_function) (sigval_t); /* Function to start. */
- void *_attribute; /* Really pthread_attr_t. */
- } _sigev_thread;
- } _sigev_un;
- } sigevent_t;
-
-/* POSIX names to access some of the members. */
-# define sigev_notify_function _sigev_un._sigev_thread._function
-# define sigev_notify_attributes _sigev_un._sigev_thread._attribute
-
-/* `sigev_notify' values. */
-enum
-{
- SIGEV_SIGNAL = 0, /* Notify via signal. */
-# define SIGEV_SIGNAL SIGEV_SIGNAL
- SIGEV_NONE, /* Other notification: meaningless. */
-# define SIGEV_NONE SIGEV_NONE
- SIGEV_THREAD, /* Deliver via thread creation. */
-# define SIGEV_THREAD SIGEV_THREAD
-
- SIGEV_THREAD_ID = 4 /* Send signal to specific thread. */
-#define SIGEV_THREAD_ID SIGEV_THREAD_ID
-};
-
-#endif /* have _SIGNAL_H. */
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/clone.S b/ports/sysdeps/unix/sysv/linux/m68k/clone.S
index a179f8e0f..401e2ef3a 100644
--- a/ports/sysdeps/unix/sysv/linux/m68k/clone.S
+++ b/ports/sysdeps/unix/sysv/linux/m68k/clone.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996,97,98,2002 Free Software Foundation, Inc.
+/* Copyright (C) 1996,97,98,2002,2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Schwab (schwab@issan.informatik.uni-dortmund.de)
@@ -17,14 +17,21 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
-/* clone is even more special than fork as it mucks with stacks
+/* clone() is even more special than fork() as it mucks with stacks
and invokes a function in the right context after its all over. */
#include <sysdep.h>
#define _ERRNO_H 1
#include <bits/errno.h>
+#ifdef RESET_PID
+#include <tls.h>
+#endif
+
+#define CLONE_VM 0x00000100
+#define CLONE_THREAD 0x00010000
-/* int clone(int (*fn)(void *arg), void *child_stack, int flags, void *arg); */
+/* int clone(int (*fn)(void *arg), void *child_stack, int flags, void *arg,
+ void *parent_tidptr, void *tls, void *child_tidptr) */
.text
ENTRY (__clone)
@@ -42,7 +49,17 @@ ENTRY (__clone)
movel 16(%sp), -(%a1)
/* Do the system call */
- movel 12(%sp), %d1 /* get flags */
+ movel 12+0(%sp), %d1 /* get flags */
+ movel %d3, -(%a1) /* save %d3 and get parent_tidptr */
+ movel %d3, -(%sp)
+ movel 20+4(%sp), %d3
+ movel %d4, -(%a1) /* save %d4 and get child_tidptr */
+ movel %d4, -(%sp)
+ movel 28+8(%sp), %d4
+ movel %d5, -(%a1) /* save %d5 and get tls */
+ movel %d5, -(%sp)
+ movel 24+12(%sp), %d5
+ /* save %d2 and get stack pointer */
#ifdef __mcoldfire__
movel %d2, -(%a1)
movel %d2, -(%sp)
@@ -57,6 +74,9 @@ ENTRY (__clone)
#else
exg %d2, %a1 /* restore %d2 */
#endif
+ movel (%sp)+, %d5 /* restore %d5, %d4 and %d3 */
+ movel (%sp)+, %d4
+ movel (%sp)+, %d3
tstl %d0
jmi SYSCALL_ERROR_LABEL
@@ -65,11 +85,35 @@ ENTRY (__clone)
rts
thread_start:
+ cfi_startproc
+ cfi_undefined (pc) /* Mark end of stack */
subl %fp, %fp /* terminate the stack frame */
+#ifdef RESET_PID
+ /* Check and see if we need to reset the PID. */
+ movel %d1, %a1
+ andl #CLONE_THREAD, %d1
+ jne donepid
+ movel %a1, %d1
+ movel #-1, %d0
+ andl #CLONE_VM, %d1
+ jne gotpid
+ movel #SYS_ify (getpid), %d0
+ trap #0
+gotpid:
+ movel %a0, -(%sp)
+ movel %d0, -(%sp)
+ bsrl __m68k_read_tp@PLTPC
+ movel (%sp)+, %d0
+ movel %d0, PID_OFFSET(%a0)
+ movel %d0, TID_OFFSET(%a0)
+ movel (%sp)+, %a0
+donepid:
+#endif
jsr (%a0)
movel %d0, %d1
movel #SYS_ify (exit), %d0
trap #0
+ cfi_endproc
PSEUDO_END (__clone)
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/coldfire/m68k-helpers.S b/ports/sysdeps/unix/sysv/linux/m68k/coldfire/m68k-helpers.S
new file mode 100644
index 000000000..242d23d39
--- /dev/null
+++ b/ports/sysdeps/unix/sysv/linux/m68k/coldfire/m68k-helpers.S
@@ -0,0 +1,105 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ In addition to the permissions in the GNU Lesser General Public
+ License, the Free Software Foundation gives you unlimited
+ permission to link the compiled version of this file with other
+ programs, and to distribute those programs without any restriction
+ coming from the use of this file. (The GNU Lesser General Public
+ License restrictions do apply in other respects; for example, they
+ cover modification of the file, and distribution when not linked
+ into another program.)
+
+ Note that people who make modified versions of this file are not
+ obligated to grant this special exception for their modified
+ versions; it is their choice whether to do so. The GNU Lesser
+ General Public License gives permission to release a modified
+ version without this exception; this exception also makes it
+ possible to release a modified version which carries forward this
+ exception.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <bits/m68k-vdso.h>
+
+ .text
+
+ .hidden __vdso_read_tp_stub
+ENTRY (__vdso_read_tp_stub)
+ cfi_startproc
+ move.l #__NR_get_thread_area, %d0
+ trap #0
+ move.l %d0, %a0
+ rts
+ cfi_endproc
+END (__vdso_read_tp_stub)
+
+# ifdef SHARED
+/* GCC will emit calls to this routine. Linux has an
+ equivalent helper function (which clobbers fewer registers than
+ a normal function call) in a vdso; tail call to the
+ helper. */
+# ifdef IS_IN_rtld
+/* rtld gets a hidden copy of __m68k_read_tp. */
+ .hidden __m68k_read_tp
+# endif
+ENTRY (__m68k_read_tp)
+ cfi_startproc
+ move.l #_GLOBAL_OFFSET_TABLE_@GOTPC, %a0
+ lea (-6, %pc, %a0), %a0
+ move.l M68K_VDSO_SYMBOL (__vdso_read_tp)@GOT(%a0), %a0
+ move.l (%a0), %a0
+ jmp (%a0)
+ cfi_endproc
+END (__m68k_read_tp)
+
+/* The following two stubs are for macros in atomic.h, they can't
+ clobber anything. */
+
+ .hidden __vdso_atomic_cmpxchg_32_stub
+ENTRY (__vdso_atomic_cmpxchg_32_stub)
+ cfi_startproc
+ move.l %d2, -(%sp)
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (%d2, 0)
+ move.l %d0, %d2
+ move.l #SYS_ify (atomic_cmpxchg_32), %d0
+ trap #0
+ move.l (%sp)+, %d2
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (%d2)
+ rts
+ cfi_endproc
+END (__vdso_atomic_cmpxchg_32_stub)
+
+ .hidden __vdso_atomic_barrier_stub
+ENTRY (__vdso_atomic_barrier_stub)
+ cfi_startproc
+ move.l %d0, -(%sp)
+ cfi_adjust_cfa_offset (4)
+ move.l #SYS_ify (atomic_barrier), %d0
+ trap #0
+ move.l (%sp)+, %d0
+ cfi_adjust_cfa_offset (-4)
+ rts
+ cfi_endproc
+END (__vdso_atomic_barrier_stub)
+# else /* !SHARED */
+/* If the vDSO is not available, use a syscall to get TP. */
+ strong_alias (__vdso_read_tp_stub, __m68k_read_tp)
+# endif /* SHARED */
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/coldfire/nptl/bits/atomic.h b/ports/sysdeps/unix/sysv/linux/m68k/coldfire/nptl/bits/atomic.h
new file mode 100644
index 000000000..1198bb9c4
--- /dev/null
+++ b/ports/sysdeps/unix/sysv/linux/m68k/coldfire/nptl/bits/atomic.h
@@ -0,0 +1,105 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _BITS_ATOMIC_H
+#define _BITS_ATOMIC_H 1
+
+#include <stdint.h>
+#include <sysdep.h>
+#include <bits/m68k-vdso.h>
+
+/* Coldfire has no atomic compare-and-exchange operation, but the
+ kernel provides userspace atomicity operations. Use them. */
+
+typedef int32_t atomic32_t;
+typedef uint32_t uatomic32_t;
+typedef int_fast32_t atomic_fast32_t;
+typedef uint_fast32_t uatomic_fast32_t;
+
+typedef intptr_t atomicptr_t;
+typedef uintptr_t uatomicptr_t;
+typedef intmax_t atomic_max_t;
+typedef uintmax_t uatomic_max_t;
+
+/* The only basic operation needed is compare and exchange. */
+/* For ColdFire we'll have to trap into the kernel mode anyway,
+ so trap from the library rather then from the kernel wrapper. */
+#ifdef SHARED
+# define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
+ ({ \
+ /* Use temporary variables to workaround call-clobberness of */ \
+ /* the registers. */ \
+ __typeof (mem) _mem = mem; \
+ __typeof (oldval) _oldval = oldval; \
+ __typeof (newval) _newval = newval; \
+ register __typeof (mem) _a0 asm ("a0") = _mem; \
+ register __typeof (oldval) _d0 asm ("d0") = _oldval; \
+ register __typeof (newval) _d1 asm ("d1") = _newval; \
+ void *tmp; \
+ \
+ asm ("movel #_GLOBAL_OFFSET_TABLE_@GOTPC, %2\n\t" \
+ "lea (-6, %%pc, %2), %2\n\t" \
+ "movel " STR_M68K_VDSO_SYMBOL (__vdso_atomic_cmpxchg_32) \
+ "@GOT(%2), %2\n\t" \
+ "movel (%2), %2\n\t" \
+ "jsr (%2)\n\t" \
+ : "+d" (_d0), "+m" (*_a0), "=&a" (tmp) \
+ : "a" (_a0), "d" (_d1)); \
+ _d0; \
+ })
+#else
+# define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
+ ({ \
+ /* Use temporary variables to workaround call-clobberness of */ \
+ /* the registers. */ \
+ __typeof (mem) _mem = mem; \
+ __typeof (oldval) _oldval = oldval; \
+ __typeof (newval) _newval = newval; \
+ register __typeof (oldval) _d0 asm ("d0") \
+ = SYS_ify (atomic_cmpxchg_32); \
+ register __typeof (mem) _a0 asm ("a0") = _mem; \
+ register __typeof (oldval) _d2 asm ("d2") = _oldval; \
+ register __typeof (newval) _d1 asm ("d1") = _newval; \
+ \
+ asm ("trap #0" \
+ : "+d" (_d0), "+m" (*_a0) \
+ : "a" (_a0), "d" (_d2), "d" (_d1)); \
+ _d0; \
+ })
+#endif
+
+#ifdef SHARED
+# define atomic_full_barrier() \
+ ({ \
+ void *tmp; \
+ \
+ asm ("movel #_GLOBAL_OFFSET_TABLE_@GOTPC, %0\n\t" \
+ "lea (-6, %pc, %0), %0\n\t" \
+ "movel " STR_M68K_VDSO_SYMBOL (__vdso_atomic_barrier) \
+ "@GOT(%0), %0\n\t" \
+ "movel (%0), %0\n\t" \
+ "jsr (%0)\n\t" \
+ : "=&a" (tmp)); \
+ })
+#else
+# define atomic_full_barrier() \
+ (INTERNAL_SYSCALL (atomic_barrier, , 0), (void) 0)
+#endif
+
+#endif
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/init-first.c b/ports/sysdeps/unix/sysv/linux/m68k/init-first.c
new file mode 100644
index 000000000..f8168d129
--- /dev/null
+++ b/ports/sysdeps/unix/sysv/linux/m68k/init-first.c
@@ -0,0 +1,74 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* Note: linking in vDSO to a static binary requires changes to
+ the main GLIBC proper. Not yet implemented. */
+#ifdef SHARED
+
+#include <dl-vdso.h>
+#include <bits/m68k-vdso.h>
+
+static inline void
+_libc_vdso_platform_setup (void)
+{
+ void *p;
+
+ PREPARE_VERSION (linux26, "LINUX_2.6", 61765110);
+
+ /* It may happen that rtld didn't initialize the vDSO, so fallback
+ to the syscall implementations if _dl_vdso_vsym returns NULL.
+ This may happen when a static executable dlopen's a dynamic library.
+ This really is nothing more than a workaround for rtld/csu
+ deficiency. Ideally, init code would setup the vDSO for static
+ binaries too. */
+
+ p = _dl_vdso_vsym ("__kernel_read_tp", &linux26);
+ if (p != NULL)
+ {
+ __vdso_read_tp = p;
+ __rtld___vdso_read_tp = p;
+ }
+ else
+ assert (__vdso_read_tp == (void *) __vdso_read_tp_stub);
+
+ p = _dl_vdso_vsym ("__kernel_atomic_cmpxchg_32", &linux26);
+ if (p != NULL)
+ {
+ __vdso_atomic_cmpxchg_32 = p;
+ __rtld___vdso_atomic_cmpxchg_32 = p;
+ }
+ else
+ assert (__vdso_atomic_cmpxchg_32
+ == (void *) __vdso_atomic_cmpxchg_32_stub);
+
+ p = _dl_vdso_vsym ("__kernel_atomic_barrier", &linux26);
+ if (p != NULL)
+ {
+ __vdso_atomic_barrier = p;
+ __rtld___vdso_atomic_barrier = p;
+ }
+ else
+ assert (__vdso_atomic_barrier == (void *) __vdso_atomic_barrier_stub);
+}
+
+#define VDSO_SETUP _libc_vdso_platform_setup
+
+#endif /* SHARED */
+
+#include <sysdeps/unix/sysv/linux/init-first.c>
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/libc-m68k-vdso.c b/ports/sysdeps/unix/sysv/linux/m68k/libc-m68k-vdso.c
new file mode 100644
index 000000000..45982e99b
--- /dev/null
+++ b/ports/sysdeps/unix/sysv/linux/m68k/libc-m68k-vdso.c
@@ -0,0 +1 @@
+#include "m68k-vdso.c"
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/m680x0/m68k-helpers.S b/ports/sysdeps/unix/sysv/linux/m68k/m680x0/m68k-helpers.S
new file mode 100644
index 000000000..83ce56423
--- /dev/null
+++ b/ports/sysdeps/unix/sysv/linux/m68k/m680x0/m68k-helpers.S
@@ -0,0 +1,103 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ In addition to the permissions in the GNU Lesser General Public
+ License, the Free Software Foundation gives you unlimited
+ permission to link the compiled version of this file with other
+ programs, and to distribute those programs without any restriction
+ coming from the use of this file. (The GNU Lesser General Public
+ License restrictions do apply in other respects; for example, they
+ cover modification of the file, and distribution when not linked
+ into another program.)
+
+ Note that people who make modified versions of this file are not
+ obligated to grant this special exception for their modified
+ versions; it is their choice whether to do so. The GNU Lesser
+ General Public License gives permission to release a modified
+ version without this exception; this exception also makes it
+ possible to release a modified version which carries forward this
+ exception.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <bits/m68k-vdso.h>
+
+ .text
+
+ .hidden __vdso_read_tp_stub
+ENTRY (__vdso_read_tp_stub)
+ cfi_startproc
+ move.l #__NR_get_thread_area, %d0
+ trap #0
+ move.l %d0, %a0
+ rts
+ cfi_endproc
+END (__vdso_read_tp_stub)
+
+# ifdef SHARED
+/* GCC will emit calls to this routine. Linux has an
+ equivalent helper function (which clobbers fewer registers than
+ a normal function call) in a vdso; tail call to the
+ helper. */
+# ifdef IS_IN_rtld
+/* rtld gets a hidden copy of __m68k_read_tp. */
+ .hidden __m68k_read_tp
+# endif
+ENTRY (__m68k_read_tp)
+ cfi_startproc
+ lea _GLOBAL_OFFSET_TABLE_@GOTPC(%pc), %a0
+ move.l M68K_VDSO_SYMBOL (__vdso_read_tp)@GOT(%a0), %a0
+ jmp ([%a0])
+ cfi_endproc
+END (__m68k_read_tp)
+
+/* The following two stubs are for macros in atomic.h, they can't
+ clobber anything. */
+
+ .hidden __vdso_atomic_cmpxchg_32_stub
+ENTRY (__vdso_atomic_cmpxchg_32_stub)
+ cfi_startproc
+ move.l %d2, -(%sp)
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (%d2, 0)
+ move.l %d0, %d2
+ move.l #SYS_ify (atomic_cmpxchg_32), %d0
+ trap #0
+ move.l (%sp)+, %d2
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (%d2)
+ rts
+ cfi_endproc
+END (__vdso_atomic_cmpxchg_32_stub)
+
+ .hidden __vdso_atomic_barrier_stub
+ENTRY (__vdso_atomic_barrier_stub)
+ cfi_startproc
+ move.l %d0, -(%sp)
+ cfi_adjust_cfa_offset (4)
+ move.l #SYS_ify (atomic_barrier), %d0
+ trap #0
+ move.l (%sp)+, %d0
+ cfi_adjust_cfa_offset (-4)
+ rts
+ cfi_endproc
+END (__vdso_atomic_barrier_stub)
+# else /* !SHARED */
+/* If the vDSO is not available, use a syscall to get TP. */
+ strong_alias (__vdso_read_tp_stub, __m68k_read_tp)
+# endif /* SHARED */
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/m68k-vdso.c b/ports/sysdeps/unix/sysv/linux/m68k/m68k-vdso.c
new file mode 100644
index 000000000..73b25709b
--- /dev/null
+++ b/ports/sysdeps/unix/sysv/linux/m68k/m68k-vdso.c
@@ -0,0 +1,35 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifdef SHARED
+
+#include <bits/m68k-vdso.h>
+
+/* Because these pointers are used from other libraries than libc,
+ they are exported at GLIBC_PRIVATE version.
+ We initialize them to syscall implementation so that they will be ready
+ to use from the very beginning. */
+void * M68K_VDSO_SYMBOL (__vdso_read_tp)
+= (void *) __vdso_read_tp_stub;
+void * M68K_VDSO_SYMBOL (__vdso_atomic_cmpxchg_32)
+= (void *) __vdso_atomic_cmpxchg_32_stub;
+void * M68K_VDSO_SYMBOL (__vdso_atomic_barrier)
+= (void *) __vdso_atomic_barrier_stub;
+
+#endif /* SHARED */
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/nptl/bits/pthreadtypes.h b/ports/sysdeps/unix/sysv/linux/m68k/nptl/bits/pthreadtypes.h
new file mode 100644
index 000000000..01408101c
--- /dev/null
+++ b/ports/sysdeps/unix/sysv/linux/m68k/nptl/bits/pthreadtypes.h
@@ -0,0 +1,172 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _BITS_PTHREADTYPES_H
+#define _BITS_PTHREADTYPES_H 1
+
+#include <endian.h>
+
+#define __SIZEOF_PTHREAD_ATTR_T 36
+#define __SIZEOF_PTHREAD_MUTEX_T 24
+#define __SIZEOF_PTHREAD_MUTEXATTR_T 4
+#define __SIZEOF_PTHREAD_COND_T 48
+#define __SIZEOF_PTHREAD_CONDATTR_T 4
+#define __SIZEOF_PTHREAD_RWLOCK_T 32
+#define __SIZEOF_PTHREAD_RWLOCKATTR_T 8
+#define __SIZEOF_PTHREAD_BARRIER_T 20
+#define __SIZEOF_PTHREAD_BARRIERATTR_T 4
+
+
+/* Thread identifiers. The structure of the attribute type is
+ deliberately not exposed. */
+typedef unsigned long int pthread_t;
+
+
+typedef union
+{
+ char __size[__SIZEOF_PTHREAD_ATTR_T];
+ long int __align;
+} pthread_attr_t;
+
+
+typedef struct __pthread_internal_slist
+{
+ struct __pthread_internal_slist *__next;
+} __pthread_slist_t;
+
+
+/* Data structures for mutex handling. The structure of the attribute
+ type is deliberately not exposed. */
+typedef union
+{
+ struct __pthread_mutex_s
+ {
+ int __lock;
+ unsigned int __count;
+ int __owner;
+ /* KIND must stay at this position in the structure to maintain
+ binary compatibility. */
+ int __kind;
+ unsigned int __nusers;
+ __extension__ union
+ {
+ int __spins;
+ __pthread_slist_t __list;
+ };
+ } __data;
+ char __size[__SIZEOF_PTHREAD_MUTEX_T];
+ long int __align;
+} pthread_mutex_t;
+
+typedef union
+{
+ char __size[__SIZEOF_PTHREAD_MUTEXATTR_T];
+ int __align;
+} pthread_mutexattr_t;
+
+
+/* Data structure for conditional variable handling. The structure of
+ the attribute type is deliberately not exposed. */
+typedef union
+{
+ struct
+ {
+ int __lock;
+ unsigned int __futex;
+ __extension__ unsigned long long int __total_seq;
+ __extension__ unsigned long long int __wakeup_seq;
+ __extension__ unsigned long long int __woken_seq;
+ void *__mutex;
+ unsigned int __nwaiters;
+ unsigned int __broadcast_seq;
+ } __data;
+ char __size[__SIZEOF_PTHREAD_COND_T];
+ __extension__ long long int __align;
+} pthread_cond_t;
+
+typedef union
+{
+ char __size[__SIZEOF_PTHREAD_CONDATTR_T];
+ int __align;
+} pthread_condattr_t;
+
+
+/* Keys for thread-specific data */
+typedef unsigned int pthread_key_t;
+
+
+/* Once-only execution */
+typedef int pthread_once_t;
+
+
+#if defined __USE_UNIX98 || defined __USE_XOPEN2K
+/* Data structure for read-write lock variable handling. The
+ structure of the attribute type is deliberately not exposed. */
+typedef union
+{
+ struct
+ {
+ int __lock;
+ unsigned int __nr_readers;
+ unsigned int __readers_wakeup;
+ unsigned int __writer_wakeup;
+ unsigned int __nr_readers_queued;
+ unsigned int __nr_writers_queued;
+ unsigned char __pad1;
+ unsigned char __pad2;
+ unsigned char __shared;
+ /* FLAGS must stay at this position in the structure to maintain
+ binary compatibility. */
+ unsigned char __flags;
+ int __writer;
+ } __data;
+ char __size[__SIZEOF_PTHREAD_RWLOCK_T];
+ long int __align;
+} pthread_rwlock_t;
+
+typedef union
+{
+ char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T];
+ long int __align;
+} pthread_rwlockattr_t;
+#endif
+
+
+#ifdef __USE_XOPEN2K
+/* POSIX spinlock data type. */
+typedef volatile int pthread_spinlock_t;
+
+
+/* POSIX barriers data type. The structure of the type is
+ deliberately not exposed. */
+typedef union
+{
+ char __size[__SIZEOF_PTHREAD_BARRIER_T];
+ long int __align;
+} pthread_barrier_t;
+
+typedef union
+{
+ char __size[__SIZEOF_PTHREAD_BARRIERATTR_T];
+ int __align;
+} pthread_barrierattr_t;
+#endif
+
+
+#endif /* bits/pthreadtypes.h */
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/nptl/bits/semaphore.h b/ports/sysdeps/unix/sysv/linux/m68k/nptl/bits/semaphore.h
new file mode 100644
index 000000000..2950cc9a7
--- /dev/null
+++ b/ports/sysdeps/unix/sysv/linux/m68k/nptl/bits/semaphore.h
@@ -0,0 +1,36 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _SEMAPHORE_H
+# error "Never use <bits/semaphore.h> directly; include <semaphore.h> instead."
+#endif
+
+
+#define __SIZEOF_SEM_T 16
+
+
+/* Value returned if `sem_open' failed. */
+#define SEM_FAILED ((sem_t *) 0)
+
+
+typedef union
+{
+ char __size[__SIZEOF_SEM_T];
+ long int __align;
+} sem_t;
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/nptl/clone.S b/ports/sysdeps/unix/sysv/linux/m68k/nptl/clone.S
new file mode 100644
index 000000000..e7388fee7
--- /dev/null
+++ b/ports/sysdeps/unix/sysv/linux/m68k/nptl/clone.S
@@ -0,0 +1,2 @@
+#define RESET_PID
+#include "../clone.S"
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/nptl/createthread.c b/ports/sysdeps/unix/sysv/linux/m68k/nptl/createthread.c
new file mode 100644
index 000000000..be1144435
--- /dev/null
+++ b/ports/sysdeps/unix/sysv/linux/m68k/nptl/createthread.c
@@ -0,0 +1,25 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* Value passed to 'clone' for initialization of the thread register. */
+#define TLS_VALUE ((void *) (pd) \
+ + TLS_TCB_OFFSET + TLS_PRE_TCB_SIZE)
+
+/* Get the real implementation. */
+#include <nptl/sysdeps/pthread/createthread.c>
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/nptl/fork.c b/ports/sysdeps/unix/sysv/linux/m68k/nptl/fork.c
new file mode 100644
index 000000000..dc1584e2d
--- /dev/null
+++ b/ports/sysdeps/unix/sysv/linux/m68k/nptl/fork.c
@@ -0,0 +1,30 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sched.h>
+#include <signal.h>
+#include <sysdep.h>
+#include <tls.h>
+
+#define ARCH_FORK() \
+ INLINE_SYSCALL (clone, 5, \
+ CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | SIGCHLD, 0, \
+ NULL, &THREAD_SELF->tid, NULL)
+
+#include <sysdeps/unix/sysv/linux/fork.c>
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/nptl/lowlevellock.h b/ports/sysdeps/unix/sysv/linux/m68k/nptl/lowlevellock.h
new file mode 100644
index 000000000..5ca78ec72
--- /dev/null
+++ b/ports/sysdeps/unix/sysv/linux/m68k/nptl/lowlevellock.h
@@ -0,0 +1,280 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* Borrowed from ARM's version. */
+
+#ifndef _LOWLEVELLOCK_H
+#define _LOWLEVELLOCK_H 1
+
+#include <time.h>
+#include <sys/param.h>
+#include <bits/pthreadtypes.h>
+#include <atomic.h>
+#include <kernel-features.h>
+
+#define FUTEX_WAIT 0
+#define FUTEX_WAKE 1
+#define FUTEX_REQUEUE 3
+#define FUTEX_CMP_REQUEUE 4
+#define FUTEX_WAKE_OP 5
+#define FUTEX_OP_CLEAR_WAKE_IF_GT_ONE ((4 << 24) | 1)
+#define FUTEX_LOCK_PI 6
+#define FUTEX_UNLOCK_PI 7
+#define FUTEX_TRYLOCK_PI 8
+#define FUTEX_WAIT_BITSET 9
+#define FUTEX_WAKE_BITSET 10
+#define FUTEX_PRIVATE_FLAG 128
+#define FUTEX_CLOCK_REALTIME 256
+
+#define FUTEX_BITSET_MATCH_ANY 0xffffffff
+
+/* Values for 'private' parameter of locking macros. Yes, the
+ definition seems to be backwards. But it is not. The bit will be
+ reversed before passing to the system call. */
+#define LLL_PRIVATE 0
+#define LLL_SHARED FUTEX_PRIVATE_FLAG
+
+
+#if !defined NOT_IN_libc || defined IS_IN_rtld
+/* In libc.so or ld.so all futexes are private. */
+# ifdef __ASSUME_PRIVATE_FUTEX
+# define __lll_private_flag(fl, private) \
+ ((fl) | FUTEX_PRIVATE_FLAG)
+# else
+# define __lll_private_flag(fl, private) \
+ ((fl) | THREAD_GETMEM (THREAD_SELF, header.private_futex))
+# endif
+#else
+# ifdef __ASSUME_PRIVATE_FUTEX
+# define __lll_private_flag(fl, private) \
+ (((fl) | FUTEX_PRIVATE_FLAG) ^ (private))
+# else
+# define __lll_private_flag(fl, private) \
+ (__builtin_constant_p (private) \
+ ? ((private) == 0 \
+ ? ((fl) | THREAD_GETMEM (THREAD_SELF, header.private_futex)) \
+ : (fl)) \
+ : ((fl) | (((private) ^ FUTEX_PRIVATE_FLAG) \
+ & THREAD_GETMEM (THREAD_SELF, header.private_futex))))
+# endif
+#endif
+
+
+#define lll_futex_wait(futexp, val, private) \
+ lll_futex_timed_wait(futexp, val, NULL, private)
+
+#define lll_futex_timed_wait(futexp, val, timespec, private) \
+ ({ \
+ INTERNAL_SYSCALL_DECL (__err); \
+ long int __ret; \
+ __ret = INTERNAL_SYSCALL (futex, __err, 4, (futexp), \
+ __lll_private_flag (FUTEX_WAIT, private), \
+ (val), (timespec)); \
+ __ret; \
+ })
+
+#define lll_futex_wake(futexp, nr, private) \
+ ({ \
+ INTERNAL_SYSCALL_DECL (__err); \
+ long int __ret; \
+ __ret = INTERNAL_SYSCALL (futex, __err, 4, (futexp), \
+ __lll_private_flag (FUTEX_WAKE, private), \
+ (nr), 0); \
+ __ret; \
+ })
+
+#define lll_robust_dead(futexv, private) \
+ do \
+ { \
+ int *__futexp = &(futexv); \
+ atomic_or (__futexp, FUTEX_OWNER_DIED); \
+ lll_futex_wake (__futexp, 1, private); \
+ } \
+ while (0)
+
+/* Returns non-zero if error happened, zero if success. */
+#define lll_futex_requeue(futexp, nr_wake, nr_move, mutex, val, private) \
+ ({ \
+ INTERNAL_SYSCALL_DECL (__err); \
+ long int __ret; \
+ __ret = INTERNAL_SYSCALL (futex, __err, 6, (futexp), \
+ __lll_private_flag (FUTEX_CMP_REQUEUE, private),\
+ (nr_wake), (nr_move), (mutex), (val)); \
+ INTERNAL_SYSCALL_ERROR_P (__ret, __err); \
+ })
+
+/* Returns non-zero if error happened, zero if success. */
+#define lll_futex_wake_unlock(futexp, nr_wake, nr_wake2, futexp2, private) \
+ ({ \
+ INTERNAL_SYSCALL_DECL (__err); \
+ long int __ret; \
+ __ret = INTERNAL_SYSCALL (futex, __err, 6, (futexp), \
+ __lll_private_flag (FUTEX_WAKE_OP, private), \
+ (nr_wake), (nr_wake2), (futexp2), \
+ FUTEX_OP_CLEAR_WAKE_IF_GT_ONE); \
+ INTERNAL_SYSCALL_ERROR_P (__ret, __err); \
+ })
+
+#define lll_trylock(lock) \
+ atomic_compare_and_exchange_val_acq (&(lock), 1, 0)
+
+#define lll_cond_trylock(lock) \
+ atomic_compare_and_exchange_val_acq (&(lock), 2, 0)
+
+#define lll_robust_trylock(lock, id) \
+ atomic_compare_and_exchange_val_acq (&(lock), id, 0)
+
+extern void __lll_lock_wait_private (int *futex) attribute_hidden;
+extern void __lll_lock_wait (int *futex, int private) attribute_hidden;
+extern int __lll_robust_lock_wait (int *futex, int private) attribute_hidden;
+
+#define __lll_lock(futex, private) \
+ ((void) ({ \
+ int *__futex = (futex); \
+ if (__builtin_expect (atomic_compare_and_exchange_val_acq (__futex, \
+ 1, 0), 0)) \
+ { \
+ if (__builtin_constant_p (private) && (private) == LLL_PRIVATE) \
+ __lll_lock_wait_private (__futex); \
+ else \
+ __lll_lock_wait (__futex, private); \
+ } \
+ }))
+#define lll_lock(futex, private) __lll_lock (&(futex), private)
+
+
+#define __lll_robust_lock(futex, id, private) \
+ ({ \
+ int *__futex = (futex); \
+ int __val = 0; \
+ \
+ if (__builtin_expect (atomic_compare_and_exchange_bool_acq (__futex, id, \
+ 0), 0)) \
+ __val = __lll_robust_lock_wait (__futex, private); \
+ __val; \
+ })
+#define lll_robust_lock(futex, id, private) \
+ __lll_robust_lock (&(futex), id, private)
+
+
+#define __lll_cond_lock(futex, private) \
+ ((void) ({ \
+ int *__futex = (futex); \
+ if (__builtin_expect (atomic_exchange_acq (__futex, 2), 0)) \
+ __lll_lock_wait (__futex, private); \
+ }))
+#define lll_cond_lock(futex, private) __lll_cond_lock (&(futex), private)
+
+
+#define lll_robust_cond_lock(futex, id, private) \
+ __lll_robust_lock (&(futex), (id) | FUTEX_WAITERS, private)
+
+
+extern int __lll_timedlock_wait (int *futex, const struct timespec *,
+ int private) attribute_hidden;
+extern int __lll_robust_timedlock_wait (int *futex, const struct timespec *,
+ int private) attribute_hidden;
+
+#define __lll_timedlock(futex, abstime, private) \
+ ({ \
+ int *__futex = (futex); \
+ int __val = 0; \
+ \
+ if (__builtin_expect (atomic_exchange_acq (__futex, 1), 0)) \
+ __val = __lll_timedlock_wait (__futex, abstime, private); \
+ __val; \
+ })
+#define lll_timedlock(futex, abstime, private) \
+ __lll_timedlock (&(futex), abstime, private)
+
+
+#define __lll_robust_timedlock(futex, abstime, id, private) \
+ ({ \
+ int *__futex = (futex); \
+ int __val = 0; \
+ \
+ if (__builtin_expect (atomic_compare_and_exchange_bool_acq (__futex, id, \
+ 0), 0)) \
+ __val = __lll_robust_timedlock_wait (__futex, abstime, private); \
+ __val; \
+ })
+#define lll_robust_timedlock(futex, abstime, id, private) \
+ __lll_robust_timedlock (&(futex), abstime, id, private)
+
+
+#define __lll_unlock(futex, private) \
+ (void) \
+ ({ int *__futex = (futex); \
+ int __oldval = atomic_exchange_rel (__futex, 0); \
+ if (__builtin_expect (__oldval > 1, 0)) \
+ lll_futex_wake (__futex, 1, private); \
+ })
+#define lll_unlock(futex, private) __lll_unlock(&(futex), private)
+
+
+#define __lll_robust_unlock(futex, private) \
+ (void) \
+ ({ int *__futex = (futex); \
+ int __oldval = atomic_exchange_rel (__futex, 0); \
+ if (__builtin_expect (__oldval & FUTEX_WAITERS, 0)) \
+ lll_futex_wake (__futex, 1, private); \
+ })
+#define lll_robust_unlock(futex, private) \
+ __lll_robust_unlock(&(futex), private)
+
+
+#define lll_islocked(futex) \
+ (futex != 0)
+
+
+/* Our internal lock implementation is identical to the binary-compatible
+ mutex implementation. */
+
+/* Initializers for lock. */
+#define LLL_LOCK_INITIALIZER (0)
+#define LLL_LOCK_INITIALIZER_LOCKED (1)
+
+/* The states of a lock are:
+ 0 - untaken
+ 1 - taken by one user
+ >1 - taken by more users */
+
+/* The kernel notifies a process which uses CLONE_CLEARTID via futex
+ wakeup when the clone terminates. The memory location contains the
+ thread ID while the clone is running and is reset to zero
+ afterwards. */
+#define lll_wait_tid(tid) \
+ do { \
+ __typeof (tid) __tid; \
+ while ((__tid = (tid)) != 0) \
+ lll_futex_wait (&(tid), __tid, LLL_SHARED); \
+ } while (0)
+
+extern int __lll_timedwait_tid (int *, const struct timespec *)
+ attribute_hidden;
+
+#define lll_timedwait_tid(tid, abstime) \
+ ({ \
+ int __res = 0; \
+ if ((tid) != 0) \
+ __res = __lll_timedwait_tid (&(tid), (abstime)); \
+ __res; \
+ })
+
+#endif /* lowlevellock.h */
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/nptl/pt-vfork.S b/ports/sysdeps/unix/sysv/linux/m68k/nptl/pt-vfork.S
new file mode 100644
index 000000000..639d5ed2c
--- /dev/null
+++ b/ports/sysdeps/unix/sysv/linux/m68k/nptl/pt-vfork.S
@@ -0,0 +1,36 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <tcb-offsets.h>
+
+#define SAVE_PID \
+ bsrl __m68k_read_tp@PLTPC ; /* Get the thread pointer. */ \
+ movel %a0, %a1 ; /* Save TP for RESTORE_PID. */ \
+ movel PID_OFFSET(%a1), %d0 ; /* Get the PID. */ \
+ movel %d0, %d1 ; /* Save PID for RESTORE_PID. */ \
+ negl %d0 ; /* Negate the PID. */ \
+ movel %d0, PID_OFFSET(%a1) ; /* Store the temporary PID. */
+
+#define RESTORE_PID \
+ tstl %d0 ; \
+ beq 1f ; /* If we are the parent... */ \
+ movel %d1, PID_OFFSET(%a1) ; /* Restore the PID. */ \
+1:
+
+#include <sysdeps/unix/sysv/linux/m68k/vfork.S>
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/nptl/pthread_once.c b/ports/sysdeps/unix/sysv/linux/m68k/nptl/pthread_once.c
new file mode 100644
index 000000000..415045f51
--- /dev/null
+++ b/ports/sysdeps/unix/sysv/linux/m68k/nptl/pthread_once.c
@@ -0,0 +1,91 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include "pthreadP.h"
+#include <lowlevellock.h>
+
+unsigned long int __fork_generation attribute_hidden;
+
+static void
+clear_once_control (void *arg)
+{
+ pthread_once_t *once_control = (pthread_once_t *) arg;
+
+ *once_control = 0;
+ lll_futex_wake (once_control, INT_MAX, LLL_PRIVATE);
+}
+
+int
+__pthread_once (pthread_once_t *once_control, void (*init_routine) (void))
+{
+ for (;;)
+ {
+ int oldval;
+ int newval;
+
+ /* Pseudo code:
+ newval = __fork_generation | 1;
+ oldval = *once_control;
+ if ((oldval & 2) == 0)
+ *once_control = newval;
+ Do this atomically.
+ */
+ do
+ {
+ newval = __fork_generation | 1;
+ oldval = *once_control;
+ if (oldval & 2)
+ break;
+ } while (atomic_compare_and_exchange_val_acq (once_control, newval, oldval) != oldval);
+
+ /* Check if the initializer has already been done. */
+ if ((oldval & 2) != 0)
+ return 0;
+
+ /* Check if another thread already runs the initializer. */
+ if ((oldval & 1) == 0)
+ break;
+
+ /* Check whether the initializer execution was interrupted by a fork. */
+ if (oldval != newval)
+ break;
+
+ /* Same generation, some other thread was faster. Wait. */
+ lll_futex_wait (once_control, oldval, LLL_PRIVATE);
+ }
+
+ /* This thread is the first here. Do the initialization.
+ Register a cleanup handler so that in case the thread gets
+ interrupted the initialization can be restarted. */
+ pthread_cleanup_push (clear_once_control, once_control);
+
+ init_routine ();
+
+ pthread_cleanup_pop (0);
+
+ /* Say that the initialisation is done. */
+ *once_control = __fork_generation | 2;
+
+ /* Wake up all other threads. */
+ lll_futex_wake (once_control, INT_MAX, LLL_PRIVATE);
+
+ return 0;
+}
+weak_alias (__pthread_once, pthread_once)
+strong_alias (__pthread_once, __pthread_once_internal)
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/nptl/sysdep-cancel.h b/ports/sysdeps/unix/sysv/linux/m68k/nptl/sysdep-cancel.h
new file mode 100644
index 000000000..87e2d55ee
--- /dev/null
+++ b/ports/sysdeps/unix/sysv/linux/m68k/nptl/sysdep-cancel.h
@@ -0,0 +1,141 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <tls.h>
+#ifndef __ASSEMBLER__
+# include <nptl/pthreadP.h>
+#endif
+
+#if !defined NOT_IN_libc || defined IS_IN_libpthread || defined IS_IN_librt
+
+# undef PSEUDO
+# define PSEUDO(name, syscall_name, args) \
+ .text; \
+ ENTRY (name) \
+ SINGLE_THREAD_P; \
+ jne .Lpseudo_cancel; \
+ .type __##syscall_name##_nocancel,@function; \
+ .globl __##syscall_name##_nocancel; \
+ __##syscall_name##_nocancel: \
+ DO_CALL (syscall_name, args); \
+ cmp.l &-4095, %d0; \
+ jcc SYSCALL_ERROR_LABEL; \
+ rts; \
+ .size __##syscall_name##_nocancel,.-__##syscall_name##_nocancel; \
+ .Lpseudo_cancel: \
+ cfi_startproc; \
+ CENABLE; \
+ DOCARGS_##args \
+ move.l %d0, -(%sp); /* Save result of CENABLE. */ \
+ cfi_adjust_cfa_offset (4); \
+ move.l &SYS_ify (syscall_name), %d0; \
+ trap &0; \
+ move.l %d0, %d2; \
+ CDISABLE; \
+ addq.l &4, %sp; /* Remove result of CENABLE from the stack. */ \
+ cfi_adjust_cfa_offset (-4); \
+ move.l %d2, %d0; \
+ UNDOCARGS_##args \
+ cmp.l &-4095, %d0; \
+ jcc SYSCALL_ERROR_LABEL; \
+ cfi_endproc
+
+/* Note: we use D2 to save syscall's return value as D0 will be clobbered in
+ CDISABLE. */
+# define DOCARGS_0 move.l %d2, -(%sp); \
+ cfi_adjust_cfa_offset (4); cfi_rel_offset (%d2, 0);
+# define UNDOCARGS_0 move.l (%sp)+, %d2; \
+ cfi_adjust_cfa_offset (-4); cfi_restore (%d2);
+
+# define DOCARGS_1 _DOCARGS_1 (4); DOCARGS_0
+# define _DOCARGS_1(n) move.l n(%sp), %d1;
+# define UNDOCARGS_1 UNDOCARGS_0
+
+# define DOCARGS_2 _DOCARGS_2 (8)
+# define _DOCARGS_2(n) DOCARGS_0 move.l n+4(%sp), %d2; _DOCARGS_1 (n)
+# define UNDOCARGS_2 UNDOCARGS_0
+
+/* TODO: We can optimize DOCARGS_{3, 4} by saving registers to a0 and a1
+ instead of pushing them on stack. */
+# define DOCARGS_3 _DOCARGS_3 (12)
+# define _DOCARGS_3(n) move.l %d3, -(%sp); \
+ cfi_adjust_cfa_offset (4); cfi_rel_offset (%d3, 0); \
+ move.l n+4(%sp), %d3; _DOCARGS_2 (n)
+# define UNDOCARGS_3 UNDOCARGS_2 move.l (%sp)+, %d3; \
+ cfi_adjust_cfa_offset (-4); cfi_restore (%d3);
+
+# define DOCARGS_4 _DOCARGS_4 (16)
+# define _DOCARGS_4(n) move.l %d4, -(%sp); \
+ cfi_adjust_cfa_offset (4); cfi_rel_offset (%d4, 0); \
+ move.l n+4(%sp), %d4; _DOCARGS_3 (n)
+# define UNDOCARGS_4 UNDOCARGS_3 move.l (%sp)+, %d4; \
+ cfi_adjust_cfa_offset (-4); cfi_restore (%d4);
+
+# define DOCARGS_5 _DOCARGS_5 (20)
+# define _DOCARGS_5(n) move.l %d5, %a1; cfi_register (%d5, a1); \
+ move.l n(%sp), %d5; _DOCARGS_4 (n-4)
+# define UNDOCARGS_5 UNDOCARGS_4 move.l %a1, %d5; cfi_restore (%d5);
+
+# define DOCARGS_6 _DOCARGS_6 (24)
+# define _DOCARGS_6(n) move.l n(%sp), %a0; _DOCARGS_5 (n-4)
+# define UNDOCARGS_6 UNDOCARGS_5
+
+# ifdef PIC
+# define PSEUDO_JMP(sym) jbsr sym ## @PLTPC
+# else
+# define PSEUDO_JMP(sym) jbsr sym
+# endif
+
+# ifdef IS_IN_libpthread
+# define CENABLE PSEUDO_JMP (__pthread_enable_asynccancel)
+# define CDISABLE PSEUDO_JMP (__pthread_disable_asynccancel)
+# elif !defined NOT_IN_libc
+# define CENABLE PSEUDO_JMP (__libc_enable_asynccancel)
+# define CDISABLE PSEUDO_JMP (__libc_disable_asynccancel)
+# elif defined IS_IN_librt
+# define CENABLE PSEUDO_JMP (__librt_enable_asynccancel)
+# define CDISABLE PSEUDO_JMP (__librt_disable_asynccancel)
+# else
+# error Unsupported library
+# endif
+
+# ifndef __ASSEMBLER__
+# define SINGLE_THREAD_P \
+ __builtin_expect (THREAD_GETMEM (THREAD_SELF, \
+ header.multiple_threads) == 0, 1)
+# else
+# define SINGLE_THREAD_P \
+ PSEUDO_JMP (__m68k_read_tp); \
+ tst.l MULTIPLE_THREADS_OFFSET(%a0)
+# endif
+
+#elif !defined __ASSEMBLER__
+
+# define SINGLE_THREAD_P (1)
+# define NO_CANCELLATION (1)
+
+#endif
+
+#ifndef __ASSEMBLER__
+# define RTLD_SINGLE_THREAD_P \
+ __builtin_expect (THREAD_GETMEM (THREAD_SELF, \
+ header.multiple_threads) == 0, \
+ 1)
+#endif
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/nptl/vfork.S b/ports/sysdeps/unix/sysv/linux/m68k/nptl/vfork.S
new file mode 100644
index 000000000..95e37642f
--- /dev/null
+++ b/ports/sysdeps/unix/sysv/linux/m68k/nptl/vfork.S
@@ -0,0 +1,38 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Maxim Kuvyrkov <maxim@codesourcery.com>, 2010.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <tcb-offsets.h>
+
+#define SAVE_PID \
+ bsrl __m68k_read_tp@PLTPC ; /* Get the thread pointer. */ \
+ movel %a0, %a1 ; /* Save TP for RESTORE_PID. */ \
+ movel PID_OFFSET(%a1), %d0 ; /* Get the PID. */ \
+ movel %d0, %d1 ; /* Save PID for RESTORE_PID. */ \
+ negl %d0 ; /* Negate the PID. */ \
+ bne 1f ; /* If it was zero... */ \
+ movel #0x80000000, %d0 ; /* use 0x80000000 instead. */ \
+1: movel %d0, PID_OFFSET(%a1) ; /* Store the temporary PID. */
+
+#define RESTORE_PID \
+ tstl %d0 ; \
+ beq 1f ; /* If we are the parent... */ \
+ movel %d1, PID_OFFSET(%a1) ; /* Restore the PID. */ \
+1:
+
+#include <sysdeps/unix/sysv/linux/m68k/vfork.S>
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/register-dump.h b/ports/sysdeps/unix/sysv/linux/m68k/register-dump.h
index 391902f49..78709d9cc 100644
--- a/ports/sysdeps/unix/sysv/linux/m68k/register-dump.h
+++ b/ports/sysdeps/unix/sysv/linux/m68k/register-dump.h
@@ -40,7 +40,6 @@
*/
-#ifndef __mcoldfire__
/* Linux saves only the call-clobbered registers in the sigcontext. We
need to use a trampoline that saves the rest so that the C code can
access them. We use the sc_fpstate field, since the handler is not
@@ -59,14 +58,17 @@ catch_segfault:\n\
/* Clear the first 4 bytes to make it a null fp state, just\n\
in case the handler does return. */\n\
clr.l (%%a0)+\n\
- movem.l %%d2-%%d7/%%a2-%%a6,(%%a0)\n\
- fmovem.x %%fp2-%%fp7,11*4(%%a0)\n\
- jra real_catch_segfault"
+ movem.l %%d2-%%d7/%%a2-%%a6,(%%a0)\n"
+#ifndef __mcoldfire__
+ "fmovem.x %%fp2-%%fp7,11*4(%%a0)\n"
+#elif defined __mcffpu__
+ "fmovem.d %%fp2-%%fp7,11*4(%%a0)\n"
+#endif
+ "jra real_catch_segfault"
: : "n" (offsetof (struct sigcontext, sc_fpstate)));
}
#define catch_segfault(a,b) \
__attribute_used__ real_catch_segfault(a,b)
-#endif
static void
hexvalue (unsigned long int value, char *buf, size_t len)
@@ -104,36 +106,19 @@ register_dump (int fd, struct sigcontext *ctx)
/* Generate strings of register contents. */
hexvalue (ctx->sc_d0, regs[0], 8);
hexvalue (ctx->sc_d1, regs[1], 8);
-#ifdef __mcoldfire__
- hexvalue (ctx->sc_d2, regs[2], 8);
- hexvalue (ctx->sc_d3, regs[3], 8);
- hexvalue (ctx->sc_d4, regs[4], 8);
- hexvalue (ctx->sc_d5, regs[5], 8);
- hexvalue (ctx->sc_d6, regs[6], 8);
- hexvalue (ctx->sc_d7, regs[7], 8);
-#else
hexvalue (*p++, regs[2], 8);
hexvalue (*p++, regs[3], 8);
hexvalue (*p++, regs[4], 8);
hexvalue (*p++, regs[5], 8);
hexvalue (*p++, regs[6], 8);
hexvalue (*p++, regs[7], 8);
-#endif
hexvalue (ctx->sc_a0, regs[8], 8);
hexvalue (ctx->sc_a1, regs[9], 8);
-#ifdef __mcoldfire__
- hexvalue (ctx->sc_a2, regs[10], 8);
- hexvalue (ctx->sc_a3, regs[11], 8);
- hexvalue (ctx->sc_a4, regs[12], 8);
- hexvalue (ctx->sc_a5, regs[13], 8);
- hexvalue (ctx->sc_a6, regs[14], 8);
-#else
hexvalue (*p++, regs[10], 8);
hexvalue (*p++, regs[11], 8);
hexvalue (*p++, regs[12], 8);
hexvalue (*p++, regs[13], 8);
hexvalue (*p++, regs[14], 8);
-#endif
hexvalue (ctx->sc_usp, regs[15], 8);
hexvalue (ctx->sc_pc, regs[16], 8);
hexvalue (ctx->sc_sr, regs[17], 4);
@@ -142,9 +127,6 @@ register_dump (int fd, struct sigcontext *ctx)
for (i = 0; i < 2; i++)
for (j = 0; j < fpreg_size; j += 8)
hexvalue (*pfp++, fpregs[i] + j, 8);
-#ifdef __mcoldfire__
- p = pfp;
-#endif
for (i = 2; i < 8; i++)
for (j = 0; j < fpreg_size; j += 8)
hexvalue (*p++, fpregs[i] + j, 8);
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/socket.S b/ports/sysdeps/unix/sysv/linux/m68k/socket.S
index 2690f182a..147a3b2d4 100644
--- a/ports/sysdeps/unix/sysv/linux/m68k/socket.S
+++ b/ports/sysdeps/unix/sysv/linux/m68k/socket.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -41,8 +41,11 @@
.globl __socket
ENTRY (__socket)
-#if defined NEED_CANCELLATION && defined CENABLE
- SINGLE_THREAD_P (%a0)
+#ifdef NEED_CANCELLATION
+# if !defined CENABLE || !defined CDISABLE
+# error CENABLE and/or CDISABLE is not defined
+# endif
+ SINGLE_THREAD_P
jne 1f
#endif
@@ -69,21 +72,26 @@ ENTRY (__socket)
/* Successful; return the syscall's value. */
rts
-#if defined NEED_CANCELLATION && defined CENABLE
-1: /* Enable asynchronous cancellation. */
+#ifdef NEED_CANCELLATION
+1: cfi_startproc
+ /* Enable asynchronous cancellation. */
CENABLE
- /* Save registers. */
+ /* Save D2. */
move.l %d2, -(%sp)
- move.l %d0, -(%sp)
+ cfi_adjust_cfa_offset (4)
+ cfi_rel_offset (%d2, 0)
- move.l #SYS_ify (socketcall), %d0 /* System call number in %d0. */
+ /* Save the result of CENABLE. */
+ move.l %d0, -(%sp)
+ cfi_adjust_cfa_offset (4)
/* Use ## so `socket' is a separate token that might be #define'd. */
move.l #P (SOCKOP_,socket), %d1 /* Subcode is first arg to syscall. */
lea 4+8(%sp), %a1 /* Address of args is 2nd arg. */
move.l %a1, %d2
+ move.l #SYS_ify (socketcall), %d0 /* System call number in %d0. */
/* Do the system call trap. */
trap #0
@@ -91,14 +99,18 @@ ENTRY (__socket)
move.l %d0, %d2
CDISABLE
addq.l #4, %sp
+ cfi_adjust_cfa_offset (-4)
move.l %d2, %d0
/* Restore registers. */
move.l (%sp)+, %d2
+ cfi_adjust_cfa_offset (-4)
+ cfi_restore (%d2)
/* %d0 is < 0 if there was an error. */
tst.l %d0
jmi SYSCALL_ERROR_LABEL
+ cfi_endproc
/* Successful; return the syscall's value. */
rts
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/sysdep.h b/ports/sysdeps/unix/sysv/linux/m68k/sysdep.h
index 12687d805..8bd188b0b 100644
--- a/ports/sysdeps/unix/sysv/linux/m68k/sysdep.h
+++ b/ports/sysdeps/unix/sysv/linux/m68k/sysdep.h
@@ -1,4 +1,5 @@
-/* Copyright (C) 1996, 1997, 1998, 2000, 2003, 2004, 2006 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998, 2000, 2003, 2004, 2006, 2010
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Written by Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de>,
December 1995.
@@ -23,6 +24,7 @@
#include <sysdeps/unix/sysdep.h>
#include <sysdeps/m68k/sysdep.h>
+#include <tls.h>
/* Defines RTLD_PRIVATE_ERRNO. */
#include <dl-sysdep.h>
@@ -109,10 +111,27 @@ SYSCALL_ERROR_LABEL: \
a pointer (e.g., mmap). */ \
move.l %d0, %a0; \
rts;
-# else /* !RTLD_PRIVATE_ERRNO */
-/* Store (- %d0) into errno through the GOT. */
-# if defined _LIBC_REENTRANT
-# define SYSCALL_ERROR_HANDLER \
+# elif USE___THREAD
+# ifndef NOT_IN_libc
+# define SYSCALL_ERROR_ERRNO __libc_errno
+# else
+# define SYSCALL_ERROR_ERRNO errno
+# endif
+# define SYSCALL_ERROR_HANDLER \
+SYSCALL_ERROR_LABEL: \
+ neg.l %d0; \
+ move.l %d0, -(%sp); \
+ jbsr __m68k_read_tp@PLTPC; \
+ lea (_GLOBAL_OFFSET_TABLE_@GOTPC, %pc), %a1; \
+ add.l (SYSCALL_ERROR_ERRNO@TLSIE, %a1), %a0; \
+ move.l (%sp)+, (%a0); \
+ move.l &-1, %d0; \
+ /* Copy return value to %a0 for syscalls that are declared to return \
+ a pointer (e.g., mmap). */ \
+ move.l %d0, %a0; \
+ rts;
+# elif defined _LIBC_REENTRANT
+# define SYSCALL_ERROR_HANDLER \
SYSCALL_ERROR_LABEL: \
neg.l %d0; \
move.l %d0, -(%sp); \
@@ -123,8 +142,9 @@ SYSCALL_ERROR_LABEL: \
a pointer (e.g., mmap). */ \
move.l %d0, %a0; \
rts;
-# else /* !_LIBC_REENTRANT */
-# define SYSCALL_ERROR_HANDLER \
+# else /* !_LIBC_REENTRANT */
+/* Store (- %d0) into errno through the GOT. */
+# define SYSCALL_ERROR_HANDLER \
SYSCALL_ERROR_LABEL: \
move.l (errno@GOTPC, %pc), %a0; \
neg.l %d0; \
@@ -134,8 +154,7 @@ SYSCALL_ERROR_LABEL: \
a pointer (e.g., mmap). */ \
move.l %d0, %a0; \
rts;
-# endif /* _LIBC_REENTRANT */
-# endif /* RTLD_PRIVATE_ERRNO */
+# endif /* _LIBC_REENTRANT */
#else
# define SYSCALL_ERROR_HANDLER /* Nothing here; code in sysdep.S is used. */
#endif /* PIC */
@@ -148,9 +167,11 @@ SYSCALL_ERROR_LABEL: \
arg 3 %d3 call-saved
arg 4 %d4 call-saved
arg 5 %d5 call-saved
+ arg 6 %a0 call-clobbered
The stack layout upon entering the function is:
+ 24(%sp) Arg# 6
20(%sp) Arg# 5
16(%sp) Arg# 4
12(%sp) Arg# 3
@@ -229,7 +250,7 @@ SYSCALL_ERROR_LABEL: \
normally. It will never touch errno. This returns just what the kernel
gave back. */
#undef INTERNAL_SYSCALL
-#define INTERNAL_SYSCALL(name, err, nr, args...) \
+#define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
({ unsigned int _sys_result; \
{ \
/* Load argument values in temporary variables
@@ -237,7 +258,7 @@ SYSCALL_ERROR_LABEL: \
before the call used registers are set. */ \
LOAD_ARGS_##nr (args) \
LOAD_REGS_##nr \
- register int _d0 asm ("%d0") = __NR_##name; \
+ register int _d0 asm ("%d0") = name; \
asm volatile ("trap #0" \
: "=d" (_d0) \
: "0" (_d0) ASM_ARGS_##nr \
@@ -245,6 +266,8 @@ SYSCALL_ERROR_LABEL: \
_sys_result = _d0; \
} \
(int) _sys_result; })
+#define INTERNAL_SYSCALL(name, err, nr, args...) \
+ INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, ##args)
#undef INTERNAL_SYSCALL_ERROR_P
#define INTERNAL_SYSCALL_ERROR_P(val, err) \
@@ -300,4 +323,15 @@ SYSCALL_ERROR_LABEL: \
#define ASM_ARGS_6 ASM_ARGS_5, "a" (_a0)
#endif /* not __ASSEMBLER__ */
+
+/* Pointer mangling is not yet supported for M68K. */
+#define PTR_MANGLE(var) (void) (var)
+#define PTR_DEMANGLE(var) (void) (var)
+
+#if defined NEED_DL_SYSINFO || defined NEED_DL_SYSINFO_DSO
+/* M68K needs system-supplied DSO to access TLS helpers
+ even when statically linked. */
+# define NEED_STATIC_SYSINFO_DSO 1
+#endif
+
#endif
diff --git a/ports/sysdeps/unix/sysv/linux/m68k/vfork.S b/ports/sysdeps/unix/sysv/linux/m68k/vfork.S
index 8027b2f80..4def7e3b4 100644
--- a/ports/sysdeps/unix/sysv/linux/m68k/vfork.S
+++ b/ports/sysdeps/unix/sysv/linux/m68k/vfork.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 1999, 2002, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 1999, 2002, 2003, 2010 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Schwab <schwab@gnu.org>.
@@ -22,6 +22,14 @@
#include <bits/errno.h>
#include <kernel-features.h>
+#ifndef SAVE_PID
+#define SAVE_PID
+#endif
+
+#ifndef RESTORE_PID
+#define RESTORE_PID
+#endif
+
/* Clone the calling process, but without copying the whole address space.
The calling process is suspended until the new process exits or is
replaced by a call to `execve'. Return -1 for errors, 0 to the new process,
@@ -31,12 +39,20 @@ ENTRY (__vfork)
#ifdef __NR_vfork
+ /* SAVE_PID clobbers call-clobbered registers and
+ saves data in D1 and A1. */
+
+ SAVE_PID
+
/* Pop the return PC value into A0. */
movel %sp@+, %a0
/* Stuff the syscall number in D0 and trap into the kernel. */
movel #SYS_ify (vfork), %d0
trap #0
+
+ RESTORE_PID
+
tstl %d0
jmi .Lerror /* Branch forward if it failed. */