summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLiubov Dmitrieva <liubov.dmitrieva@intel.com>2013-05-24 13:18:17 +0400
committerLiubov Dmitrieva <ldmitrie@sourceware.org>2013-10-23 19:07:38 +0400
commit01d5454d13d2c21b9a08b28441d37a7ddce089a6 (patch)
treedfda1fe689b9fe448aa9a41725c50a7bad3e27e4
parentc57d11da52265f7ae5368669f8340f31818b6474 (diff)
downloadglibc-01d5454d13d2c21b9a08b28441d37a7ddce089a6.tar.gz
Implemented bounds check support for string/memory routines for x86_32.
Warning: Not completed and haven't tested.
-rw-r--r--sysdeps/i386/i486/strcat.S79
-rw-r--r--sysdeps/i386/i586/strchr.S31
-rw-r--r--sysdeps/i386/i586/strcpy.S7
-rw-r--r--sysdeps/i386/i586/strlen.S22
-rw-r--r--sysdeps/i386/i686/memcmp.S172
-rw-r--r--sysdeps/i386/i686/memset.S5
-rw-r--r--sysdeps/i386/i686/multiarch/Makefile7
-rw-r--r--sysdeps/i386/i686/multiarch/Versions7
-rw-r--r--sysdeps/i386/i686/multiarch/__bcopy.S (renamed from sysdeps/i386/i686/multiarch/bcopy.S)0
-rw-r--r--sysdeps/i386/i686/multiarch/__memcpy.S (renamed from sysdeps/i386/i686/multiarch/memcpy.S)0
-rw-r--r--sysdeps/i386/i686/multiarch/__memcpy_chk.S (renamed from sysdeps/i386/i686/multiarch/memcpy_chk.S)0
-rw-r--r--sysdeps/i386/i686/multiarch/__memmove.S (renamed from sysdeps/i386/i686/multiarch/memmove.S)0
-rw-r--r--sysdeps/i386/i686/multiarch/__memmove_chk.S (renamed from sysdeps/i386/i686/multiarch/memmove_chk.S)0
-rw-r--r--sysdeps/i386/i686/multiarch/__mempcpy.S (renamed from sysdeps/i386/i686/multiarch/mempcpy.S)0
-rw-r--r--sysdeps/i386/i686/multiarch/__mempcpy_chk.S (renamed from sysdeps/i386/i686/multiarch/mempcpy_chk.S)0
-rw-r--r--sysdeps/i386/i686/multiarch/bcopy.c7
-rw-r--r--sysdeps/i386/i686/multiarch/ifunc-impl-list.c86
-rw-r--r--sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S82
-rw-r--r--sysdeps/i386/i686/multiarch/memcmp-sse4.S53
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy.c40
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy_chk.c1
-rw-r--r--sysdeps/i386/i686/multiarch/memmove.c76
-rw-r--r--sysdeps/i386/i686/multiarch/memmove_chk.c1
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy.c40
-rw-r--r--sysdeps/i386/i686/multiarch/mempcpy_chk.c1
-rw-r--r--sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S6
-rw-r--r--sysdeps/i386/i686/multiarch/memset-sse2-rep.S6
-rw-r--r--sysdeps/i386/i686/multiarch/mpx_memcpy_nobnd.S1803
-rw-r--r--sysdeps/i386/i686/multiarch/mpx_memmove_nobnd.S3
-rw-r--r--sysdeps/i386/i686/multiarch/mpx_mempcpy_nobnd.S3
-rw-r--r--sysdeps/i386/i686/multiarch/strcat-sse2.S186
-rw-r--r--sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S24
-rw-r--r--sysdeps/i386/i686/multiarch/strcmp-sse4.S78
-rw-r--r--sysdeps/i386/i686/multiarch/strcpy-sse2.S380
-rw-r--r--sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S29
-rw-r--r--sysdeps/i386/i686/multiarch/strlen-sse2.S67
-rw-r--r--sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S30
-rw-r--r--sysdeps/i386/i686/multiarch/wcschr-sse2.S45
-rw-r--r--sysdeps/i386/i686/multiarch/wcscmp-sse2.S8
-rw-r--r--sysdeps/i386/i686/multiarch/wcscpy-ssse3.S181
-rw-r--r--sysdeps/i386/i686/multiarch/wcslen-sse2.S57
-rw-r--r--sysdeps/i386/i686/multiarch/wcsrchr-sse2.S23
-rw-r--r--sysdeps/i386/i686/strcmp.S15
-rw-r--r--sysdeps/i386/i686/strtok.S8
-rw-r--r--sysdeps/i386/memchr.S47
-rw-r--r--sysdeps/i386/memcmp.S14
-rw-r--r--sysdeps/i386/rawmemchr.S32
-rw-r--r--sysdeps/i386/stpncpy.S8
-rw-r--r--sysdeps/i386/strchrnul.S35
-rw-r--r--sysdeps/i386/strcspn.S8
-rw-r--r--sysdeps/i386/strpbrk.S8
-rw-r--r--sysdeps/i386/strrchr.S28
-rw-r--r--sysdeps/i386/strtok.S11
53 files changed, 3793 insertions, 67 deletions
diff --git a/sysdeps/i386/i486/strcat.S b/sysdeps/i386/i486/strcat.S
index 7d45862a2a..af2602efd6 100644
--- a/sysdeps/i386/i486/strcat.S
+++ b/sysdeps/i386/i486/strcat.S
@@ -35,9 +35,19 @@ ENTRY (strcat)
movl DEST(%esp), %edx
movl SRC(%esp), %ecx
+#ifdef __CHKP__
+ bndldx DEST(%esp,%edx,1), %bnd0
+ bndldx SRC(%esp,%ecx,1), %bnd1
+ bndcl (%ecx), %bnd1
+ bndcu (%ecx), %bnd1
+#endif
testb $0xff, (%ecx) /* Is source string empty? */
jz L(8) /* yes => return */
+#ifdef __CHKP__
+ bndcl (%edx), %bnd0
+ bndcu (%edx), %bnd0
+#endif
/* Test the first bytes separately until destination is aligned. */
testl $3, %edx /* destination pointer aligned? */
@@ -66,7 +76,11 @@ ENTRY (strcat)
L(4): addl $16,%edx /* increment destination pointer for round */
-L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */
+L(1):
+#ifdef __CHKP__
+ bndcu (%edx), %bnd0
+#endif
+ movl (%edx), %eax /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
/* If you compare this with the algorithm in memchr.S you will
@@ -98,6 +112,9 @@ L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */
/* If at least one byte of the word is C we don't get 0 in %ecx. */
jnz L(3)
+#ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+#endif
movl 4(%edx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
@@ -110,6 +127,9 @@ L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(5) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu 8(%edx), %bnd0
+#endif
movl 8(%edx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
@@ -122,6 +142,9 @@ L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(6) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu 12(%edx), %bnd0
+#endif
movl 12(%edx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
@@ -155,6 +178,10 @@ L(2): subl %ecx, %edx /* reduce number of loop variants */
/* Now we have to align the source pointer. */
testl $3, %ecx /* pointer correctly aligned? */
jz L(29) /* yes => start copy loop */
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+ bndcu (%ecx, %edx), %bnd0
+#endif
movb (%ecx), %al /* get first byte */
movb %al, (%ecx,%edx) /* and store it */
andb %al, %al /* is byte NUL? */
@@ -163,6 +190,10 @@ L(2): subl %ecx, %edx /* reduce number of loop variants */
testl $3, %ecx /* pointer correctly aligned? */
jz L(29) /* yes => start copy loop */
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+ bndcu (%ecx, %edx), %bnd0
+#endif
movb (%ecx), %al /* get first byte */
movb %al, (%ecx,%edx) /* and store it */
andb %al, %al /* is byte NUL? */
@@ -171,6 +202,10 @@ L(2): subl %ecx, %edx /* reduce number of loop variants */
testl $3, %ecx /* pointer correctly aligned? */
jz L(29) /* yes => start copy loop */
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+ bndcu (%ecx, %edx), %bnd0
+#endif
movb (%ecx), %al /* get first byte */
movb %al, (%ecx,%edx) /* and store it */
andb %al, %al /* is byte NUL? */
@@ -182,10 +217,18 @@ L(2): subl %ecx, %edx /* reduce number of loop variants */
ALIGN(4)
-L(28): movl %eax, 12(%ecx,%edx)/* store word at destination */
+L(28):
+#ifdef __CHKP__
+ bndcu 12(%ecx, %edx), %bnd0
+#endif
+ movl %eax, 12(%ecx,%edx)/* store word at destination */
addl $16, %ecx /* adjust pointer for full round */
-L(29): movl (%ecx), %eax /* get word from source */
+L(29):
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+#endif
+ movl (%ecx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
carry bits reported for each byte which
@@ -196,8 +239,14 @@ L(29): movl (%ecx), %eax /* get word from source */
incl %edi /* add 1: if one carry bit was *not* set
the addition will not result in 0. */
jnz L(9) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu (%ecx, %edx), %bnd0
+#endif
movl %eax, (%ecx,%edx) /* store word to destination */
+#ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+#endif
movl 4(%ecx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
@@ -209,8 +258,14 @@ L(29): movl (%ecx), %eax /* get word from source */
incl %edi /* add 1: if one carry bit was *not* set
the addition will not result in 0. */
jnz L(91) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu 4(%ecx, %edx), %bnd0
+#endif
movl %eax, 4(%ecx,%edx) /* store word to destination */
+#ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+#endif
movl 8(%ecx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
@@ -222,8 +277,14 @@ L(29): movl (%ecx), %eax /* get word from source */
incl %edi /* add 1: if one carry bit was *not* set
the addition will not result in 0. */
jnz L(92) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu 8(%ecx, %edx), %bnd0
+#endif
movl %eax, 8(%ecx,%edx) /* store word to destination */
+#ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+#endif
movl 12(%ecx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
@@ -240,15 +301,25 @@ L(93): addl $4, %ecx /* adjust pointer */
L(92): addl $4, %ecx
L(91): addl $4, %ecx
-L(9): movb %al, (%ecx,%edx) /* store first byte of last word */
+L(9):
+#ifdef __CHKP__
+ bndcu (%ecx, %edx), %bnd0
+#endif
+ movb %al, (%ecx,%edx) /* store first byte of last word */
orb %al, %al /* is it NUL? */
jz L(8) /* yes => return */
+#ifdef __CHKP__
+ bndcu 1(%ecx, %edx), %bnd0
+#endif
movb %ah, 1(%ecx,%edx) /* store second byte of last word */
orb %ah, %ah /* is it NUL? */
jz L(8) /* yes => return */
shrl $16, %eax /* make upper bytes accessible */
+#ifdef __CHKP__
+ bndcu 2(%ecx, %edx), %bnd0
+#endif
movb %al, 2(%ecx,%edx) /* store third byte of last word */
orb %al, %al /* is it NUL? */
jz L(8) /* yes => return */
diff --git a/sysdeps/i386/i586/strchr.S b/sysdeps/i386/i586/strchr.S
index 648d52830d..4efa935771 100644
--- a/sysdeps/i386/i586/strchr.S
+++ b/sysdeps/i386/i586/strchr.S
@@ -54,6 +54,10 @@ ENTRY (strchr)
movl STR(%esp), %eax
movl CHR(%esp), %edx
+#ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcl (%eax), %bnd0
+#endif
movl %eax, %edi /* duplicate string pointer for later */
cfi_rel_offset (edi, 12)
@@ -83,6 +87,9 @@ ENTRY (strchr)
xorb %dl, %cl /* load single byte and test for NUL */
je L(3) /* yes => return NULL */
+#ifdef __CHKP__
+ bndcu 1(%eax), %bnd0
+#endif
movb 1(%eax), %cl /* load single byte */
incl %eax
@@ -97,7 +104,11 @@ ENTRY (strchr)
jne L(11)
-L(0): movb (%eax), %cl /* load single byte */
+L(0):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
+ movb (%eax), %cl /* load single byte */
cmpb %cl, %dl /* is byte == C? */
je L(out) /* aligned => return pointer */
@@ -115,7 +126,11 @@ L(0): movb (%eax), %cl /* load single byte */
four instruction up to `L1' will not be executed in the loop
because the same code is found at the end of the loop, but
there it is executed in parallel with other instructions. */
-L(11): movl (%eax), %ecx
+L(11):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
+ movl (%eax), %ecx
movl $magic, %ebp
movl $magic, %edi
@@ -159,6 +174,9 @@ L(1): xorl %ecx, %ebp /* (word^magic) */
movl $magic, %esi /* load magic value */
xorl %edx, %ebx /* clear words which are C */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx
addl %ebx, %esi /* (word+magic) */
@@ -189,6 +207,9 @@ L(1): xorl %ecx, %ebp /* (word^magic) */
movl $magic, %esi
xorl %edx, %ebx
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx
addl %ebx, %esi
@@ -219,6 +240,9 @@ L(1): xorl %ecx, %ebp /* (word^magic) */
movl $magic, %esi
xorl %edx, %ebx
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx
addl %ebx, %esi
@@ -249,6 +273,9 @@ L(1): xorl %ecx, %ebp /* (word^magic) */
movl $magic, %esi
xorl %edx, %ebx
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx
addl %ebx, %esi
diff --git a/sysdeps/i386/i586/strcpy.S b/sysdeps/i386/i586/strcpy.S
index c940369342..6392a8e3d4 100644
--- a/sysdeps/i386/i586/strcpy.S
+++ b/sysdeps/i386/i586/strcpy.S
@@ -45,6 +45,10 @@ ENTRY (STRCPY)
cfi_rel_offset (edi, 8)
movl SRC(%esp), %esi
cfi_rel_offset (esi, 4)
+#ifdef __CHKP__
+ bndldx DEST(%esp,%edi,1), %bnd0
+ bndldx SRC(%esp,%esi,1), %bnd1
+#endif
xorl %eax, %eax
leal -1(%esi), %ecx
@@ -61,6 +65,9 @@ ENTRY (STRCPY)
/* 0xb is the distance between 2: and 1: but we avoid writing
1f-2b because the assembler generates worse code. */
leal 0xb(%edx,%ecx,8), %ecx
+# ifdef __CHKP__
+ jmp L(1)
+# endif
#else
leal 1f(,%ecx,8), %ecx
#endif
diff --git a/sysdeps/i386/i586/strlen.S b/sysdeps/i386/i586/strlen.S
index b50fffa1fb..9034625e95 100644
--- a/sysdeps/i386/i586/strlen.S
+++ b/sysdeps/i386/i586/strlen.S
@@ -41,6 +41,10 @@
ENTRY (strlen)
movl STR(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcu (%eax),%bnd0
+#endif
movl $3, %edx /* load mask (= 3) */
andl %eax, %edx /* separate last two bits of address */
@@ -48,10 +52,16 @@ ENTRY (strlen)
jz L(1) /* aligned => start loop */
jp L(0) /* exactly two bits set */
+#ifdef __CHKP__
+ bndcu (%eax),%bnd0
+#endif
cmpb %dh, (%eax) /* is byte NUL? */
je L(2) /* yes => return */
incl %eax /* increment pointer */
+#ifdef __CHKP__
+ bndcu (%eax),%bnd0
+#endif
cmpb %dh, (%eax) /* is byte NUL? */
je L(2) /* yes => return */
@@ -61,7 +71,11 @@ ENTRY (strlen)
jz L(1)
-L(0): cmpb %dh, (%eax) /* is byte NUL? */
+L(0):
+#ifdef __CHKP__
+ bndcu (%eax),%bnd0
+#endif
+ cmpb %dh, (%eax) /* is byte NUL? */
je L(2) /* yes => return */
incl %eax /* increment pointer */
@@ -174,7 +188,11 @@ L(3): subl $4, %eax /* correct too early pointer increment */
incl %eax /* increment pointer */
-L(2): subl STR(%esp), %eax /* now compute the length as difference
+L(2):
+#ifdef __CHKP__
+ bndcu (%eax),%bnd0
+#endif
+ subl STR(%esp), %eax /* now compute the length as difference
between start and terminating NUL
character */
ret
diff --git a/sysdeps/i386/i686/memcmp.S b/sysdeps/i386/i686/memcmp.S
index b8091a60ec..6cb03e7a7b 100644
--- a/sysdeps/i386/i686/memcmp.S
+++ b/sysdeps/i386/i686/memcmp.S
@@ -48,9 +48,19 @@ ENTRY (memcmp)
movl BLK1(%esp), %eax
movl BLK2(%esp), %edx
movl LEN(%esp), %ecx
+#ifdef __CHKP__
+ bndldx BLK1(%esp,%eax,1), %bnd0
+ bndldx BLK2(%esp,%edx,1), %bnd1
+#endif
cmpl $1, %ecx
jne L(not_1)
+#ifdef __CHKP__
+ bndcl (%eax), %bnd0
+ bndcu (%eax), %bnd0
+ bndcl (%edx), %bnd1
+ bndcu (%edx), %bnd1
+#endif
movzbl (%eax), %ecx /* LEN == 1 */
cmpb (%edx), %cl
jne L(neq)
@@ -69,6 +79,12 @@ L(neq):
cfi_rel_offset (ebx, 0)
L(not_1):
jl L(bye) /* LEN == 0 */
+#ifdef __CHKP__
+ bndcl (%eax), %bnd0
+ bndcu (%eax), %bnd0
+ bndcl (%edx), %bnd1
+ bndcu (%edx), %bnd1
+#endif
pushl %esi
cfi_adjust_cfa_offset (4)
@@ -84,36 +100,64 @@ L(not_1):
ALIGN (4)
L(28bytes):
+#ifdef __CHKP__
+ bndcu -28(%esi), %bnd0
+ bndcu -28(%edx), %bnd1
+#endif
movl -28(%esi), %eax
movl -28(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(24bytes):
+#ifdef __CHKP__
+ bndcu -24(%esi), %bnd0
+ bndcu -24(%edx), %bnd1
+#endif
movl -24(%esi), %eax
movl -24(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(20bytes):
+#ifdef __CHKP__
+ bndcu -20(%esi), %bnd0
+ bndcu -20(%edx), %bnd1
+#endif
movl -20(%esi), %eax
movl -20(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(16bytes):
+#ifdef __CHKP__
+ bndcu -16(%esi), %bnd0
+ bndcu -16(%edx), %bnd1
+#endif
movl -16(%esi), %eax
movl -16(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(12bytes):
+#ifdef __CHKP__
+ bndcu -12(%esi), %bnd0
+ bndcu -12(%edx), %bnd1
+#endif
movl -12(%esi), %eax
movl -12(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(8bytes):
+#ifdef __CHKP__
+ bndcu -8(%esi), %bnd0
+ bndcu -8(%edx), %bnd1
+#endif
movl -8(%esi), %eax
movl -8(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(4bytes):
+#ifdef __CHKP__
+ bndcu -4(%esi), %bnd0
+ bndcu -4(%edx), %bnd1
+#endif
movl -4(%esi), %eax
movl -4(%edx), %ecx
cmpl %ecx, %eax
@@ -129,41 +173,73 @@ L(0bytes):
cfi_rel_offset (esi, 0)
cfi_rel_offset (ebx, 4)
L(29bytes):
+#ifdef __CHKP__
+ bndcu -29(%esi), %bnd0
+ bndcu -29(%edx), %bnd1
+#endif
movl -29(%esi), %eax
movl -29(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(25bytes):
+#ifdef __CHKP__
+ bndcu -25(%esi), %bnd0
+ bndcu -25(%edx), %bnd1
+#endif
movl -25(%esi), %eax
movl -25(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(21bytes):
+#ifdef __CHKP__
+ bndcu -21(%esi), %bnd0
+ bndcu -21(%edx), %bnd1
+#endif
movl -21(%esi), %eax
movl -21(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(17bytes):
+#ifdef __CHKP__
+ bndcu -17(%esi), %bnd0
+ bndcu -17(%edx), %bnd1
+#endif
movl -17(%esi), %eax
movl -17(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(13bytes):
+#ifdef __CHKP__
+ bndcu -13(%esi), %bnd0
+ bndcu -13(%edx), %bnd1
+#endif
movl -13(%esi), %eax
movl -13(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(9bytes):
+#ifdef __CHKP__
+ bndcu -9(%esi), %bnd0
+ bndcu -9(%edx), %bnd1
+#endif
movl -9(%esi), %eax
movl -9(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(5bytes):
+#ifdef __CHKP__
+ bndcu -5(%esi), %bnd0
+ bndcu -5(%edx), %bnd1
+#endif
movl -5(%esi), %eax
movl -5(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(1bytes):
+#ifdef __CHKP__
+ bndcu -1(%esi), %bnd0
+ bndcu -1(%edx), %bnd1
+#endif
movzbl -1(%esi), %eax
cmpb -1(%edx), %al
jne L(set)
@@ -177,41 +253,73 @@ L(1bytes):
cfi_rel_offset (esi, 0)
cfi_rel_offset (ebx, 4)
L(30bytes):
+#ifdef __CHKP__
+ bndcu -30(%esi), %bnd0
+ bndcu -30(%edx), %bnd1
+#endif
movl -30(%esi), %eax
movl -30(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(26bytes):
+#ifdef __CHKP__
+ bndcu -26(%esi), %bnd0
+ bndcu -26(%edx), %bnd1
+#endif
movl -26(%esi), %eax
movl -26(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(22bytes):
+#ifdef __CHKP__
+ bndcu -22(%esi), %bnd0
+ bndcu -22(%edx), %bnd1
+#endif
movl -22(%esi), %eax
movl -22(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(18bytes):
+#ifdef __CHKP__
+ bndcu -18(%esi), %bnd0
+ bndcu -18(%edx), %bnd1
+#endif
movl -18(%esi), %eax
movl -18(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(14bytes):
+#ifdef __CHKP__
+ bndcu -14(%esi), %bnd0
+ bndcu -14(%edx), %bnd1
+#endif
movl -14(%esi), %eax
movl -14(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(10bytes):
+#ifdef __CHKP__
+ bndcu -10(%esi), %bnd0
+ bndcu -10(%edx), %bnd1
+#endif
movl -10(%esi), %eax
movl -10(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(6bytes):
+#ifdef __CHKP__
+ bndcu -6(%esi), %bnd0
+ bndcu -6(%edx), %bnd1
+#endif
movl -6(%esi), %eax
movl -6(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(2bytes):
+#ifdef __CHKP__
+ bndcu -2(%esi), %bnd0
+ bndcu -2(%edx), %bnd1
+#endif
movzwl -2(%esi), %eax
movzwl -2(%edx), %ecx
cmpb %cl, %al
@@ -228,41 +336,73 @@ L(2bytes):
cfi_rel_offset (esi, 0)
cfi_rel_offset (ebx, 4)
L(31bytes):
+#ifdef __CHKP__
+ bndcu -31(%esi), %bnd0
+ bndcu -31(%edx), %bnd1
+#endif
movl -31(%esi), %eax
movl -31(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(27bytes):
+#ifdef __CHKP__
+ bndcu -27(%esi), %bnd0
+ bndcu -27(%edx), %bnd1
+#endif
movl -27(%esi), %eax
movl -27(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(23bytes):
+#ifdef __CHKP__
+ bndcu -23(%esi), %bnd0
+ bndcu -23(%edx), %bnd1
+#endif
movl -23(%esi), %eax
movl -23(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(19bytes):
+#ifdef __CHKP__
+ bndcu -19(%esi), %bnd0
+ bndcu -19(%edx), %bnd1
+#endif
movl -19(%esi), %eax
movl -19(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(15bytes):
+#ifdef __CHKP__
+ bndcu -15(%esi), %bnd0
+ bndcu -15(%edx), %bnd1
+#endif
movl -15(%esi), %eax
movl -15(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(11bytes):
+#ifdef __CHKP__
+ bndcu -11(%esi), %bnd0
+ bndcu -11(%edx), %bnd1
+#endif
movl -11(%esi), %eax
movl -11(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(7bytes):
+#ifdef __CHKP__
+ bndcu -7(%esi), %bnd0
+ bndcu -7(%edx), %bnd1
+#endif
movl -7(%esi), %eax
movl -7(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(3bytes):
+#ifdef __CHKP__
+ bndcu -3(%esi), %bnd0
+ bndcu -3(%edx), %bnd1
+#endif
movzwl -3(%esi), %eax
movzwl -3(%edx), %ecx
cmpb %cl, %al
@@ -286,34 +426,66 @@ L(3bytes):
L(32bytesormore):
subl $32, %ecx
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+ bndcu (%edx), %bnd1
+#endif
movl (%esi), %eax
cmpl (%edx), %eax
jne L(load_ecx)
+#ifdef __CHKP__
+ bndcu 4(%esi), %bnd0
+ bndcu 4(%edx), %bnd1
+#endif
movl 4(%esi), %eax
cmpl 4(%edx), %eax
jne L(load_ecx_4)
+#ifdef __CHKP__
+ bndcu 8(%esi), %bnd0
+ bndcu 8(%edx), %bnd1
+#endif
movl 8(%esi), %eax
cmpl 8(%edx), %eax
jne L(load_ecx_8)
+#ifdef __CHKP__
+ bndcu 12(%esi), %bnd0
+ bndcu 12(%edx), %bnd1
+#endif
movl 12(%esi), %eax
cmpl 12(%edx), %eax
jne L(load_ecx_12)
+#ifdef __CHKP__
+ bndcu 16(%esi), %bnd0
+ bndcu 16(%edx), %bnd1
+#endif
movl 16(%esi), %eax
cmpl 16(%edx), %eax
jne L(load_ecx_16)
+#ifdef __CHKP__
+ bndcu 20(%esi), %bnd0
+ bndcu 20(%edx), %bnd1
+#endif
movl 20(%esi), %eax
cmpl 20(%edx), %eax
jne L(load_ecx_20)
+#ifdef __CHKP__
+ bndcu 24(%esi), %bnd0
+ bndcu 24(%edx), %bnd1
+#endif
movl 24(%esi), %eax
cmpl 24(%edx), %eax
jne L(load_ecx_24)
+#ifdef __CHKP__
+ bndcu 28(%esi), %bnd0
+ bndcu 28(%edx), %bnd1
+#endif
movl 28(%esi), %eax
cmpl 28(%edx), %eax
jne L(load_ecx_28)
diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S
index aed79a8aa9..3fd4370b02 100644
--- a/sysdeps/i386/i686/memset.S
+++ b/sysdeps/i386/i686/memset.S
@@ -50,6 +50,11 @@ ENTRY (memset)
cfi_adjust_cfa_offset (4)
movl DEST(%esp), %edx
movl LEN(%esp), %ecx
+#ifdef __CHKP__
+ bndldx DEST(%esp,%edx,1),%bnd0
+ bndcl (%edx), %bnd0
+ bndcu -1(%edx, %ecx), %bnd0
+#endif
#if BZERO_P
xorl %eax, %eax /* fill with 0 */
#else
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index 8946bfa586..7a4999a808 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -6,9 +6,7 @@ endif
ifeq ($(subdir),string)
gen-as-const-headers += locale-defines.sym
-sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
- memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \
- memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \
+sysdep_routines += bzero-sse2 memset-sse2 \
memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \
strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \
memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift \
@@ -23,7 +21,8 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
strnlen-sse2 strnlen-c \
strcasecmp_l-c strcasecmp-c strcasecmp_l-ssse3 \
strncase_l-c strncase-c strncase_l-ssse3 \
- strcasecmp_l-sse4 strncase_l-sse4
+ strcasecmp_l-sse4 strncase_l-sse4 mpx_memcpy_nobnd \
+ mpx_mempcpy_nobnd mpx_memmove_nobnd
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
CFLAGS-varshift.c += -msse4
diff --git a/sysdeps/i386/i686/multiarch/Versions b/sysdeps/i386/i686/multiarch/Versions
index 59b185ac8d..7f0cbbcb27 100644
--- a/sysdeps/i386/i686/multiarch/Versions
+++ b/sysdeps/i386/i686/multiarch/Versions
@@ -2,4 +2,11 @@ libc {
GLIBC_PRIVATE {
__get_cpu_features;
}
+%ifdef __CHKP__
+ GLIBC_2.14 {
+ mpx_memcpy_nobnd;
+ mpx_memmove_nobnd;
+ mpx_mempcpy_nobnd;
+ }
+%endif
}
diff --git a/sysdeps/i386/i686/multiarch/bcopy.S b/sysdeps/i386/i686/multiarch/__bcopy.S
index f8e40aff94..f8e40aff94 100644
--- a/sysdeps/i386/i686/multiarch/bcopy.S
+++ b/sysdeps/i386/i686/multiarch/__bcopy.S
diff --git a/sysdeps/i386/i686/multiarch/memcpy.S b/sysdeps/i386/i686/multiarch/__memcpy.S
index 9a6cd75669..9a6cd75669 100644
--- a/sysdeps/i386/i686/multiarch/memcpy.S
+++ b/sysdeps/i386/i686/multiarch/__memcpy.S
diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.S b/sysdeps/i386/i686/multiarch/__memcpy_chk.S
index f66ecfe16b..f66ecfe16b 100644
--- a/sysdeps/i386/i686/multiarch/memcpy_chk.S
+++ b/sysdeps/i386/i686/multiarch/__memcpy_chk.S
diff --git a/sysdeps/i386/i686/multiarch/memmove.S b/sysdeps/i386/i686/multiarch/__memmove.S
index 2d20dd9409..2d20dd9409 100644
--- a/sysdeps/i386/i686/multiarch/memmove.S
+++ b/sysdeps/i386/i686/multiarch/__memmove.S
diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.S b/sysdeps/i386/i686/multiarch/__memmove_chk.S
index 9552640d52..9552640d52 100644
--- a/sysdeps/i386/i686/multiarch/memmove_chk.S
+++ b/sysdeps/i386/i686/multiarch/__memmove_chk.S
diff --git a/sysdeps/i386/i686/multiarch/mempcpy.S b/sysdeps/i386/i686/multiarch/__mempcpy.S
index 83bd1f2075..83bd1f2075 100644
--- a/sysdeps/i386/i686/multiarch/mempcpy.S
+++ b/sysdeps/i386/i686/multiarch/__mempcpy.S
diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.S b/sysdeps/i386/i686/multiarch/__mempcpy_chk.S
index 7bd4eb1406..7bd4eb1406 100644
--- a/sysdeps/i386/i686/multiarch/mempcpy_chk.S
+++ b/sysdeps/i386/i686/multiarch/__mempcpy_chk.S
diff --git a/sysdeps/i386/i686/multiarch/bcopy.c b/sysdeps/i386/i686/multiarch/bcopy.c
new file mode 100644
index 0000000000..6f5efba461
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/bcopy.c
@@ -0,0 +1,7 @@
+#include <stddef.h>
+
+void
+bcopy (const void *src, void *dst, size_t n)
+{
+ memmove (dst, src, n);
+}
diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
index 2c282bdb72..63f0704912 100644
--- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
+++ b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
@@ -37,11 +37,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
size_t i = 0;
/* Support sysdeps/i386/i686/multiarch/bcopy.S. */
- IFUNC_IMPL (i, name, bcopy,
- IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3,
- __bcopy_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3, __bcopy_ssse3)
- IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32))
+// IFUNC_IMPL (i, name, bcopy,
+// IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3,
+// __bcopy_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3, __bcopy_ssse3)
+// IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32))
/* Support sysdeps/i386/i686/multiarch/bzero.S. */
IFUNC_IMPL (i, name, bzero,
@@ -64,21 +64,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_ia32))
/* Support sysdeps/i386/i686/multiarch/memmove_chk.S. */
- IFUNC_IMPL (i, name, __memmove_chk,
- IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
- __memmove_chk_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
- __memmove_chk_ssse3)
- IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
- __memmove_chk_ia32))
+// IFUNC_IMPL (i, name, __memmove_chk,
+// IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
+// __memmove_chk_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
+// __memmove_chk_ssse3)
+// IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
+// __memmove_chk_ia32))
/* Support sysdeps/i386/i686/multiarch/memmove.S. */
- IFUNC_IMPL (i, name, memmove,
- IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
- __memmove_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
- __memmove_ssse3)
- IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32))
+// IFUNC_IMPL (i, name, memmove,
+// IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
+// __memmove_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
+// __memmove_ssse3)
+// IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32))
/* Support sysdeps/i386/i686/multiarch/memrchr.S. */
IFUNC_IMPL (i, name, memrchr,
@@ -274,37 +274,37 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
#ifdef SHARED
/* Support sysdeps/i386/i686/multiarch/memcpy_chk.S. */
- IFUNC_IMPL (i, name, __memcpy_chk,
- IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
- __memcpy_chk_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
- __memcpy_chk_ssse3)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
- __memcpy_chk_ia32))
+// IFUNC_IMPL (i, name, __memcpy_chk,
+// IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
+// __memcpy_chk_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
+// __memcpy_chk_ssse3)
+// IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
+// __memcpy_chk_ia32))
/* Support sysdeps/i386/i686/multiarch/memcpy.S. */
- IFUNC_IMPL (i, name, memcpy,
- IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3,
- __memcpy_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3)
- IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32))
+// IFUNC_IMPL (i, name, memcpy,
+// IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3,
+// __memcpy_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3)
+// IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32))
/* Support sysdeps/i386/i686/multiarch/mempcpy_chk.S. */
- IFUNC_IMPL (i, name, __mempcpy_chk,
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
- __mempcpy_chk_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
- __mempcpy_chk_ssse3)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
- __mempcpy_chk_ia32))
+// IFUNC_IMPL (i, name, __mempcpy_chk,
+// IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
+// __mempcpy_chk_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
+// __mempcpy_chk_ssse3)
+// IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
+// __mempcpy_chk_ia32))
/* Support sysdeps/i386/i686/multiarch/mempcpy.S. */
- IFUNC_IMPL (i, name, mempcpy,
- IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
- __mempcpy_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
- __mempcpy_ssse3)
- IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32))
+// IFUNC_IMPL (i, name, mempcpy,
+// IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
+// __mempcpy_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
+// __mempcpy_ssse3)
+// IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32))
/* Support sysdeps/i386/i686/multiarch/strlen.S. */
IFUNC_IMPL (i, name, strlen,
diff --git a/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S
index d3641778f3..80be0d9371 100644
--- a/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S
+++ b/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S
@@ -58,6 +58,12 @@ ENTRY (MEMCHR)
# endif
mov %ecx, %eax
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx), %bnd0
+ bndcu (%ecx), %bnd0
+# endif
+
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
@@ -79,9 +85,18 @@ ENTRY (MEMCHR)
# ifndef USE_AS_RAWMEMCHR
sub %ecx, %edx
jbe L(return_null_1)
-# endif
add %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
+ ret
+# else
+ add %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
+# endif
.p2align 4
L(unaligned_no_match_1):
@@ -163,8 +178,15 @@ L(loop_prolog):
# ifndef USE_AS_RAWMEMCHR
sub $64, %edx
jbe L(exit_loop)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
# else
+
+# ifdef __CHKP__
+ bndcu (%edx), %bnd0
+# endif
movdqa (%edx), %xmm0
# endif
pcmpeqb %xmm1, %xmm0
@@ -173,8 +195,15 @@ L(loop_prolog):
jnz L(matches)
# ifndef USE_AS_RAWMEMCHR
+# ifdef __CHKP__
+ bndcu 16(%edi), %bnd0
+# endif
movdqa 16(%edi), %xmm2
# else
+
+# ifdef __CHKP__
+ bndcu 16(%edx), %bnd0
+# endif
movdqa 16(%edx), %xmm2
# endif
pcmpeqb %xmm1, %xmm2
@@ -183,8 +212,15 @@ L(loop_prolog):
jnz L(matches16)
# ifndef USE_AS_RAWMEMCHR
+# ifdef __CHKP__
+ bndcu 32(%edi), %bnd0
+# endif
movdqa 32(%edi), %xmm3
# else
+
+# ifdef __CHKP__
+ bndcu 32(%edx), %bnd0
+# endif
movdqa 32(%edx), %xmm3
# endif
pcmpeqb %xmm1, %xmm3
@@ -193,8 +229,15 @@ L(loop_prolog):
jnz L(matches32)
# ifndef USE_AS_RAWMEMCHR
+# ifdef __CHKP__
+ bndcu 48(%edi), %bnd0
+# endif
movdqa 48(%edi), %xmm4
# else
+
+# ifdef __CHKP__
+ bndcu 48(%edx), %bnd0
+# endif
movdqa 48(%edx), %xmm4
# endif
pcmpeqb %xmm1, %xmm4
@@ -277,11 +320,18 @@ L(align64_loop):
# ifndef USE_AS_RAWMEMCHR
sub $64, %edx
jbe L(exit_loop)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
movdqa 16(%edi), %xmm2
movdqa 32(%edi), %xmm3
movdqa 48(%edi), %xmm4
# else
+
+# ifdef __CHKP__
+ bndcu (%edx), %bnd0
+# endif
movdqa (%edx), %xmm0
movdqa 16(%edx), %xmm2
movdqa 32(%edx), %xmm3
@@ -342,9 +392,15 @@ L(align64_loop):
# ifndef USE_AS_RAWMEMCHR
lea 48(%edi, %eax), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
lea 48(%edx, %eax), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
@@ -404,9 +460,15 @@ L(matches0):
bsf %eax, %eax
# ifndef USE_AS_RAWMEMCHR
lea -16(%eax, %edi), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
lea -16(%eax, %edx), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
@@ -415,9 +477,15 @@ L(matches):
bsf %eax, %eax
# ifndef USE_AS_RAWMEMCHR
add %edi, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
add %edx, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
@@ -426,9 +494,15 @@ L(matches16):
bsf %eax, %eax
# ifndef USE_AS_RAWMEMCHR
lea 16(%eax, %edi), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
lea 16(%eax, %edx), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
@@ -437,9 +511,15 @@ L(matches32):
bsf %eax, %eax
# ifndef USE_AS_RAWMEMCHR
lea 32(%eax, %edi), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
lea 32(%eax, %edx), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
diff --git a/sysdeps/i386/i686/multiarch/memcmp-sse4.S b/sysdeps/i386/i686/multiarch/memcmp-sse4.S
index 2984a374cf..3ccfe66da7 100644
--- a/sysdeps/i386/i686/multiarch/memcmp-sse4.S
+++ b/sysdeps/i386/i686/multiarch/memcmp-sse4.S
@@ -91,6 +91,15 @@ ENTRY (MEMCMP)
jbe L(less1bytes)
# endif
+# ifdef __CHKP__
+ bndldx BLK1(%esp,%eax,1), %bnd0
+ bndldx BLK2(%esp,%edx,1), %bnd1
+ bndcl (%eax), %bnd0
+ bndcl (%edx), %bnd1
+ bndcu (%eax), %bnd0
+ bndcu (%edx), %bnd1
+# endif
+
pxor %xmm0, %xmm0
cmp $64, %ecx
ja L(64bytesormore)
@@ -115,6 +124,10 @@ L(less8bytes):
cmpb (%edx), %bl
jne L(nonzero)
+# ifdef __CHKP__
+ bndcu 1(%eax), %bnd0
+ bndcu 1(%edx), %bnd1
+# endif
mov 1(%eax), %bl
cmpb 1(%edx), %bl
jne L(nonzero)
@@ -122,6 +135,10 @@ L(less8bytes):
cmp $2, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 2(%eax), %bnd0
+ bndcu 2(%edx), %bnd1
+# endif
mov 2(%eax), %bl
cmpb 2(%edx), %bl
jne L(nonzero)
@@ -129,6 +146,10 @@ L(less8bytes):
cmp $3, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 3(%eax), %bnd0
+ bndcu 3(%edx), %bnd1
+# endif
mov 3(%eax), %bl
cmpb 3(%edx), %bl
jne L(nonzero)
@@ -136,6 +157,10 @@ L(less8bytes):
cmp $4, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 4(%eax), %bnd0
+ bndcu 4(%edx), %bnd1
+# endif
mov 4(%eax), %bl
cmpb 4(%edx), %bl
jne L(nonzero)
@@ -143,6 +168,10 @@ L(less8bytes):
cmp $5, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 5(%eax), %bnd0
+ bndcu 5(%edx), %bnd1
+# endif
mov 5(%eax), %bl
cmpb 5(%edx), %bl
jne L(nonzero)
@@ -150,6 +179,10 @@ L(less8bytes):
cmp $6, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 6(%eax), %bnd0
+ bndcu 6(%edx), %bnd1
+# endif
mov 6(%eax), %bl
cmpb 6(%edx), %bl
je L(0bytes)
@@ -198,6 +231,14 @@ L(return0):
.p2align 4
L(less1bytes):
jb L(0bytesend)
+# ifdef __CHKP__
+ bndldx BLK1(%esp,%eax,1), %bnd0
+ bndldx BLK2(%esp,%edx,1), %bnd1
+ bndcl (%eax), %bnd0
+ bndcl (%edx), %bnd1
+ bndcu (%eax), %bnd0
+ bndcu (%edx), %bnd1
+# endif
movzbl (%eax), %eax
movzbl (%edx), %edx
sub %edx, %eax
@@ -221,18 +262,30 @@ L(64bytesormore_loop):
ptest %xmm2, %xmm0
jnc L(find_16diff)
+# ifdef __CHKP__
+ bndcu 16(%eax), %bnd0
+ bndcu 16(%edx), %bnd1
+# endif
movdqu 16(%eax), %xmm1
movdqu 16(%edx), %xmm2
pxor %xmm1, %xmm2
ptest %xmm2, %xmm0
jnc L(find_32diff)
+# ifdef __CHKP__
+ bndcu 32(%eax), %bnd0
+ bndcu 32(%edx), %bnd1
+# endif
movdqu 32(%eax), %xmm1
movdqu 32(%edx), %xmm2
pxor %xmm1, %xmm2
ptest %xmm2, %xmm0
jnc L(find_48diff)
+# ifdef __CHKP__
+ bndcu 48(%eax), %bnd0
+ bndcu 48(%edx), %bnd1
+# endif
movdqu 48(%eax), %xmm1
movdqu 48(%edx), %xmm2
pxor %xmm1, %xmm2
diff --git a/sysdeps/i386/i686/multiarch/memcpy.c b/sysdeps/i386/i686/multiarch/memcpy.c
new file mode 100644
index 0000000000..824cdcbc28
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memcpy.c
@@ -0,0 +1,40 @@
+#include <stddef.h>
+
+void *
+__memcpy (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ return ret;
+}
+
+weak_alias (__memcpy, __GI_memcpy)
+weak_alias (__memcpy, memcpy)
diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.c b/sysdeps/i386/i686/multiarch/memcpy_chk.c
new file mode 100644
index 0000000000..1eee86c639
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memcpy_chk.c
@@ -0,0 +1 @@
+#include <debug/memcpy_chk.c>
diff --git a/sysdeps/i386/i686/multiarch/memmove.c b/sysdeps/i386/i686/multiarch/memmove.c
new file mode 100644
index 0000000000..9e5ad6dc1a
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memmove.c
@@ -0,0 +1,76 @@
+#include <stddef.h>
+
+void *
+__memmove (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ if (s < d)
+ {
+ // backward copying
+ d += n;
+ s += n;
+ while (n--)
+ *--d = *--s;
+ }
+ else
+ // forward copying
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (s < d)
+ {
+ offset_src = (offset_src + (size_t)src) & (sizeof(size_t) - 1);
+ // backward copying
+ d += n;
+ s += n;
+ while (n-- && offset_src--)
+ *--d = *--s;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *--d1 = *--s1;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *--d = *--s;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ // forward copying
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ }
+ return ret;
+}
+
+weak_alias (__memmove, __GI_memmove)
+weak_alias (__memmove, memmove)
diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.c b/sysdeps/i386/i686/multiarch/memmove_chk.c
new file mode 100644
index 0000000000..bbf53d00d3
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memmove_chk.c
@@ -0,0 +1 @@
+#include <debug/memmove_chk.c>
diff --git a/sysdeps/i386/i686/multiarch/mempcpy.c b/sysdeps/i386/i686/multiarch/mempcpy.c
new file mode 100644
index 0000000000..6cbdad1f83
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mempcpy.c
@@ -0,0 +1,40 @@
+#include <stddef.h>
+
+void *
+mempcpy (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst + n;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ return ret;
+}
+
+weak_alias (mempcpy, __GI_mempcpy)
+weak_alias (mempcpy, __mempcpy)
diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.c b/sysdeps/i386/i686/multiarch/mempcpy_chk.c
new file mode 100644
index 0000000000..ba170784c3
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mempcpy_chk.c
@@ -0,0 +1 @@
+#include <debug/mempcpy_chk.c>
diff --git a/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S
index c5c3e97f0f..75c947c0e3 100644
--- a/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S
+++ b/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S
@@ -45,6 +45,12 @@ ENTRY (MEMCHR)
movd STR2(%esp), %xmm1
mov LEN(%esp), %edx
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx), %bnd0
+ bndcu -1(%ecx, %edx), %bnd0
+# endif
+
sub $16, %edx
jbe L(length_less16)
diff --git a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
index bcea296a9a..ce112b1f66 100644
--- a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
+++ b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
@@ -90,6 +90,7 @@ ENTRY (__memset_sse2_rep)
ENTRANCE
movl LEN(%esp), %ecx
+
#ifdef USE_AS_BZERO
xor %eax, %eax
#else
@@ -101,6 +102,11 @@ ENTRY (__memset_sse2_rep)
or %edx, %eax
#endif
movl DEST(%esp), %edx
+#ifdef __CHKP__
+ bndldx DEST(%esp,%edx,1),%bnd0
+ bndcl (%edx), %bnd0
+ bndcu -1(%edx, %ecx), %bnd0
+#endif
cmp $32, %ecx
jae L(32bytesormore)
diff --git a/sysdeps/i386/i686/multiarch/mpx_memcpy_nobnd.S b/sysdeps/i386/i686/multiarch/mpx_memcpy_nobnd.S
new file mode 100644
index 0000000000..b7f4e0e2fd
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mpx_memcpy_nobnd.S
@@ -0,0 +1,1803 @@
+/* memcpy with SSSE3 and REP string.
+ Copyright (C) 2010-2013 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#include "asm-syntax.h"
+
+#ifndef MEMCPY
+# define MEMCPY mpx_memcpy_nobnd
+#endif
+
+#ifdef USE_AS_BCOPY
+# define SRC PARMS
+# define DEST SRC+4
+# define LEN DEST+4
+#else
+# define DEST PARMS
+# define SRC DEST+4
+# define LEN SRC+4
+#endif
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#ifdef SHARED
+# define PARMS 8 /* Preserve EBX. */
+# define ENTRANCE PUSH (%ebx);
+# define RETURN_END POP (%ebx); ret
+# define RETURN RETURN_END; CFI_PUSH (%ebx)
+# define JMPTBL(I, B) I - B
+
+/* Load an entry in a jump table into EBX and branch to it. TABLE is a
+ jump table with relative offsets. INDEX is a register contains the
+ index into the jump table. SCALE is the scale of INDEX. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ /* We first load PC into EBX. */ \
+ SETUP_PIC_REG(bx); \
+ /* Get the address of the jump table. */ \
+ addl $(TABLE - .), %ebx; \
+ /* Get the entry and convert the relative offset to the \
+ absolute address. */ \
+ addl (%ebx,INDEX,SCALE), %ebx; \
+ /* We loaded the jump table. Go. */ \
+ jmp *%ebx
+
+# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) \
+ addl $(TABLE - .), %ebx
+
+# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \
+ addl (%ebx,INDEX,SCALE), %ebx; \
+ /* We loaded the jump table. Go. */ \
+ jmp *%ebx
+#else
+# define PARMS 4
+# define ENTRANCE
+# define RETURN_END ret
+# define RETURN RETURN_END
+# define JMPTBL(I, B) I
+
+/* Branch to an entry in a jump table. TABLE is a jump table with
+ absolute offsets. INDEX is a register contains the index into the
+ jump table. SCALE is the scale of INDEX. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ jmp *TABLE(,INDEX,SCALE)
+
+# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE)
+
+# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \
+ jmp *TABLE(,INDEX,SCALE)
+#endif
+
+ .section .text.ssse3,"ax",@progbits
+ENTRY (MEMCPY)
+ ENTRANCE
+ movl LEN(%esp), %ecx
+ movl SRC(%esp), %eax
+ movl DEST(%esp), %edx
+
+#ifdef __CHKP__
+ bndldx SRC(%esp,%eax,1), %bnd1
+ bndldx DEST(%esp,%edx,1), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu -1(%eax, %ecx), %bnd1
+ bndcl (%edx), %bnd0
+ bndcu -1(%edx, %ecx), %bnd0
+#endif
+
+#ifdef USE_AS_MEMMOVE
+ cmp %eax, %edx
+ jb L(copy_forward)
+ je L(fwd_write_0bytes)
+ cmp $48, %ecx
+ jb L(bk_write_less48bytes)
+ add %ecx, %eax
+ cmp %eax, %edx
+ movl SRC(%esp), %eax
+ jb L(copy_backward)
+
+L(copy_forward):
+#endif
+ cmp $48, %ecx
+ jae L(48bytesormore)
+
+L(fwd_write_less32bytes):
+#ifndef USE_AS_MEMMOVE
+ cmp %dl, %al
+ jb L(bk_write)
+#endif
+ add %ecx, %edx
+ add %ecx, %eax
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+#ifndef USE_AS_MEMMOVE
+L(bk_write):
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
+#endif
+
+ ALIGN (4)
+/* ECX > 32 and EDX is 4 byte aligned. */
+L(48bytesormore):
+ movdqu (%eax), %xmm0
+ PUSH (%edi)
+ movl %edx, %edi
+ and $-16, %edx
+ PUSH (%esi)
+ cfi_remember_state
+ add $16, %edx
+ movl %edi, %esi
+ sub %edx, %edi
+ add %edi, %ecx
+ sub %edi, %eax
+
+#ifdef SHARED_CACHE_SIZE_HALF
+ cmp $SHARED_CACHE_SIZE_HALF, %ecx
+#else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_shared_cache_size_half, %ecx
+# endif
+#endif
+
+ mov %eax, %edi
+ jae L(large_page)
+ and $0xf, %edi
+ jz L(shl_0)
+
+ BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4)
+
+ ALIGN (4)
+L(shl_0):
+ movdqu %xmm0, (%esi)
+ xor %edi, %edi
+ cmp $127, %ecx
+ ja L(shl_0_gobble)
+ lea -32(%ecx), %ecx
+L(shl_0_loop):
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+ jb L(shl_0_end)
+
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+ jb L(shl_0_end)
+
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+ jb L(shl_0_end)
+
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+L(shl_0_end):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ add %edi, %eax
+ POP (%esi)
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+L(shl_0_gobble):
+
+#ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+#else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ mov __x86_data_cache_size_half@GOTOFF(%ebx), %edi
+# else
+ mov __x86_data_cache_size_half, %edi
+# endif
+#endif
+ mov %edi, %esi
+ shr $3, %esi
+ sub %esi, %edi
+ cmp %edi, %ecx
+ jae L(shl_0_gobble_mem_start)
+ sub $128, %ecx
+ ALIGN (4)
+L(shl_0_gobble_cache_loop):
+ movdqa (%eax), %xmm0
+ movaps 0x10(%eax), %xmm1
+ movaps 0x20(%eax), %xmm2
+ movaps 0x30(%eax), %xmm3
+ movaps 0x40(%eax), %xmm4
+ movaps 0x50(%eax), %xmm5
+ movaps 0x60(%eax), %xmm6
+ movaps 0x70(%eax), %xmm7
+ lea 0x80(%eax), %eax
+ sub $128, %ecx
+ movdqa %xmm0, (%edx)
+ movaps %xmm1, 0x10(%edx)
+ movaps %xmm2, 0x20(%edx)
+ movaps %xmm3, 0x30(%edx)
+ movaps %xmm4, 0x40(%edx)
+ movaps %xmm5, 0x50(%edx)
+ movaps %xmm6, 0x60(%edx)
+ movaps %xmm7, 0x70(%edx)
+ lea 0x80(%edx), %edx
+
+ jae L(shl_0_gobble_cache_loop)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(shl_0_cache_less_64bytes)
+
+ movdqa (%eax), %xmm0
+ sub $0x40, %ecx
+ movdqa 0x10(%eax), %xmm1
+
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+
+ movdqa 0x20(%eax), %xmm0
+ movdqa 0x30(%eax), %xmm1
+ add $0x40, %eax
+
+ movdqa %xmm0, 0x20(%edx)
+ movdqa %xmm1, 0x30(%edx)
+ add $0x40, %edx
+L(shl_0_cache_less_64bytes):
+ cmp $0x20, %ecx
+ jb L(shl_0_cache_less_32bytes)
+ movdqa (%eax), %xmm0
+ sub $0x20, %ecx
+ movdqa 0x10(%eax), %xmm1
+ add $0x20, %eax
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+ add $0x20, %edx
+L(shl_0_cache_less_32bytes):
+ cmp $0x10, %ecx
+ jb L(shl_0_cache_less_16bytes)
+ sub $0x10, %ecx
+ movdqa (%eax), %xmm0
+ add $0x10, %eax
+ movdqa %xmm0, (%edx)
+ add $0x10, %edx
+L(shl_0_cache_less_16bytes):
+ add %ecx, %edx
+ add %ecx, %eax
+ POP (%esi)
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_0_gobble_mem_start):
+ cmp %al, %dl
+ je L(copy_page_by_rep)
+ sub $128, %ecx
+L(shl_0_gobble_mem_loop):
+ prefetchnta 0x1c0(%eax)
+ prefetchnta 0x280(%eax)
+ prefetchnta 0x1c0(%edx)
+ prefetchnta 0x280(%edx)
+
+ movdqa (%eax), %xmm0
+ movaps 0x10(%eax), %xmm1
+ movaps 0x20(%eax), %xmm2
+ movaps 0x30(%eax), %xmm3
+ movaps 0x40(%eax), %xmm4
+ movaps 0x50(%eax), %xmm5
+ movaps 0x60(%eax), %xmm6
+ movaps 0x70(%eax), %xmm7
+ lea 0x80(%eax), %eax
+ sub $0x80, %ecx
+ movdqa %xmm0, (%edx)
+ movaps %xmm1, 0x10(%edx)
+ movaps %xmm2, 0x20(%edx)
+ movaps %xmm3, 0x30(%edx)
+ movaps %xmm4, 0x40(%edx)
+ movaps %xmm5, 0x50(%edx)
+ movaps %xmm6, 0x60(%edx)
+ movaps %xmm7, 0x70(%edx)
+ lea 0x80(%edx), %edx
+
+ jae L(shl_0_gobble_mem_loop)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(shl_0_mem_less_64bytes)
+
+ movdqa (%eax), %xmm0
+ sub $0x40, %ecx
+ movdqa 0x10(%eax), %xmm1
+
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+
+ movdqa 0x20(%eax), %xmm0
+ movdqa 0x30(%eax), %xmm1
+ add $0x40, %eax
+
+ movdqa %xmm0, 0x20(%edx)
+ movdqa %xmm1, 0x30(%edx)
+ add $0x40, %edx
+L(shl_0_mem_less_64bytes):
+ cmp $0x20, %ecx
+ jb L(shl_0_mem_less_32bytes)
+ movdqa (%eax), %xmm0
+ sub $0x20, %ecx
+ movdqa 0x10(%eax), %xmm1
+ add $0x20, %eax
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+ add $0x20, %edx
+L(shl_0_mem_less_32bytes):
+ cmp $0x10, %ecx
+ jb L(shl_0_mem_less_16bytes)
+ sub $0x10, %ecx
+ movdqa (%eax), %xmm0
+ add $0x10, %eax
+ movdqa %xmm0, (%edx)
+ add $0x10, %edx
+L(shl_0_mem_less_16bytes):
+ add %ecx, %edx
+ add %ecx, %eax
+ POP (%esi)
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_1):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $1, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_1_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $1, %xmm2, %xmm3
+ palignr $1, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_1_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $1, %xmm2, %xmm3
+ palignr $1, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_1_loop)
+
+L(shl_1_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 1(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_2):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $2, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_2_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $2, %xmm2, %xmm3
+ palignr $2, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_2_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $2, %xmm2, %xmm3
+ palignr $2, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_2_loop)
+
+L(shl_2_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 2(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_3):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $3, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_3_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $3, %xmm2, %xmm3
+ palignr $3, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_3_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $3, %xmm2, %xmm3
+ palignr $3, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_3_loop)
+
+L(shl_3_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 3(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_4):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $4, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_4_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $4, %xmm2, %xmm3
+ palignr $4, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_4_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $4, %xmm2, %xmm3
+ palignr $4, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_4_loop)
+
+L(shl_4_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 4(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_5):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $5, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_5_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $5, %xmm2, %xmm3
+ palignr $5, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_5_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $5, %xmm2, %xmm3
+ palignr $5, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_5_loop)
+
+L(shl_5_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 5(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_6):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $6, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_6_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $6, %xmm2, %xmm3
+ palignr $6, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_6_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $6, %xmm2, %xmm3
+ palignr $6, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_6_loop)
+
+L(shl_6_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 6(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_7):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $7, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_7_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $7, %xmm2, %xmm3
+ palignr $7, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_7_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $7, %xmm2, %xmm3
+ palignr $7, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_7_loop)
+
+L(shl_7_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 7(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_8):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $8, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_8_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $8, %xmm2, %xmm3
+ palignr $8, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_8_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $8, %xmm2, %xmm3
+ palignr $8, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_8_loop)
+
+L(shl_8_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 8(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_9):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $9, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_9_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $9, %xmm2, %xmm3
+ palignr $9, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_9_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $9, %xmm2, %xmm3
+ palignr $9, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_9_loop)
+
+L(shl_9_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 9(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_10):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $10, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_10_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $10, %xmm2, %xmm3
+ palignr $10, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_10_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $10, %xmm2, %xmm3
+ palignr $10, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_10_loop)
+
+L(shl_10_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 10(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_11):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $11, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_11_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $11, %xmm2, %xmm3
+ palignr $11, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_11_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $11, %xmm2, %xmm3
+ palignr $11, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_11_loop)
+
+L(shl_11_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 11(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_12):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $12, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_12_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $12, %xmm2, %xmm3
+ palignr $12, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_12_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $12, %xmm2, %xmm3
+ palignr $12, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_12_loop)
+
+L(shl_12_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 12(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_13):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $13, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_13_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $13, %xmm2, %xmm3
+ palignr $13, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_13_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $13, %xmm2, %xmm3
+ palignr $13, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_13_loop)
+
+L(shl_13_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 13(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_14):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $14, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_14_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $14, %xmm2, %xmm3
+ palignr $14, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_14_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $14, %xmm2, %xmm3
+ palignr $14, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_14_loop)
+
+L(shl_14_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 14(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_15):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $15, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_15_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $15, %xmm2, %xmm3
+ palignr $15, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_15_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $15, %xmm2, %xmm3
+ palignr $15, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_15_loop)
+
+L(shl_15_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 15(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+
+ ALIGN (4)
+L(fwd_write_44bytes):
+ movl -44(%eax), %ecx
+ movl %ecx, -44(%edx)
+L(fwd_write_40bytes):
+ movl -40(%eax), %ecx
+ movl %ecx, -40(%edx)
+L(fwd_write_36bytes):
+ movl -36(%eax), %ecx
+ movl %ecx, -36(%edx)
+L(fwd_write_32bytes):
+ movl -32(%eax), %ecx
+ movl %ecx, -32(%edx)
+L(fwd_write_28bytes):
+ movl -28(%eax), %ecx
+ movl %ecx, -28(%edx)
+L(fwd_write_24bytes):
+ movl -24(%eax), %ecx
+ movl %ecx, -24(%edx)
+L(fwd_write_20bytes):
+ movl -20(%eax), %ecx
+ movl %ecx, -20(%edx)
+L(fwd_write_16bytes):
+ movl -16(%eax), %ecx
+ movl %ecx, -16(%edx)
+L(fwd_write_12bytes):
+ movl -12(%eax), %ecx
+ movl %ecx, -12(%edx)
+L(fwd_write_8bytes):
+ movl -8(%eax), %ecx
+ movl %ecx, -8(%edx)
+L(fwd_write_4bytes):
+ movl -4(%eax), %ecx
+ movl %ecx, -4(%edx)
+L(fwd_write_0bytes):
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_5bytes):
+ movl -5(%eax), %ecx
+ movl -4(%eax), %eax
+ movl %ecx, -5(%edx)
+ movl %eax, -4(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_45bytes):
+ movl -45(%eax), %ecx
+ movl %ecx, -45(%edx)
+L(fwd_write_41bytes):
+ movl -41(%eax), %ecx
+ movl %ecx, -41(%edx)
+L(fwd_write_37bytes):
+ movl -37(%eax), %ecx
+ movl %ecx, -37(%edx)
+L(fwd_write_33bytes):
+ movl -33(%eax), %ecx
+ movl %ecx, -33(%edx)
+L(fwd_write_29bytes):
+ movl -29(%eax), %ecx
+ movl %ecx, -29(%edx)
+L(fwd_write_25bytes):
+ movl -25(%eax), %ecx
+ movl %ecx, -25(%edx)
+L(fwd_write_21bytes):
+ movl -21(%eax), %ecx
+ movl %ecx, -21(%edx)
+L(fwd_write_17bytes):
+ movl -17(%eax), %ecx
+ movl %ecx, -17(%edx)
+L(fwd_write_13bytes):
+ movl -13(%eax), %ecx
+ movl %ecx, -13(%edx)
+L(fwd_write_9bytes):
+ movl -9(%eax), %ecx
+ movl %ecx, -9(%edx)
+ movl -5(%eax), %ecx
+ movl %ecx, -5(%edx)
+L(fwd_write_1bytes):
+ movzbl -1(%eax), %ecx
+ movb %cl, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_46bytes):
+ movl -46(%eax), %ecx
+ movl %ecx, -46(%edx)
+L(fwd_write_42bytes):
+ movl -42(%eax), %ecx
+ movl %ecx, -42(%edx)
+L(fwd_write_38bytes):
+ movl -38(%eax), %ecx
+ movl %ecx, -38(%edx)
+L(fwd_write_34bytes):
+ movl -34(%eax), %ecx
+ movl %ecx, -34(%edx)
+L(fwd_write_30bytes):
+ movl -30(%eax), %ecx
+ movl %ecx, -30(%edx)
+L(fwd_write_26bytes):
+ movl -26(%eax), %ecx
+ movl %ecx, -26(%edx)
+L(fwd_write_22bytes):
+ movl -22(%eax), %ecx
+ movl %ecx, -22(%edx)
+L(fwd_write_18bytes):
+ movl -18(%eax), %ecx
+ movl %ecx, -18(%edx)
+L(fwd_write_14bytes):
+ movl -14(%eax), %ecx
+ movl %ecx, -14(%edx)
+L(fwd_write_10bytes):
+ movl -10(%eax), %ecx
+ movl %ecx, -10(%edx)
+L(fwd_write_6bytes):
+ movl -6(%eax), %ecx
+ movl %ecx, -6(%edx)
+L(fwd_write_2bytes):
+ movzwl -2(%eax), %ecx
+ movw %cx, -2(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_47bytes):
+ movl -47(%eax), %ecx
+ movl %ecx, -47(%edx)
+L(fwd_write_43bytes):
+ movl -43(%eax), %ecx
+ movl %ecx, -43(%edx)
+L(fwd_write_39bytes):
+ movl -39(%eax), %ecx
+ movl %ecx, -39(%edx)
+L(fwd_write_35bytes):
+ movl -35(%eax), %ecx
+ movl %ecx, -35(%edx)
+L(fwd_write_31bytes):
+ movl -31(%eax), %ecx
+ movl %ecx, -31(%edx)
+L(fwd_write_27bytes):
+ movl -27(%eax), %ecx
+ movl %ecx, -27(%edx)
+L(fwd_write_23bytes):
+ movl -23(%eax), %ecx
+ movl %ecx, -23(%edx)
+L(fwd_write_19bytes):
+ movl -19(%eax), %ecx
+ movl %ecx, -19(%edx)
+L(fwd_write_15bytes):
+ movl -15(%eax), %ecx
+ movl %ecx, -15(%edx)
+L(fwd_write_11bytes):
+ movl -11(%eax), %ecx
+ movl %ecx, -11(%edx)
+L(fwd_write_7bytes):
+ movl -7(%eax), %ecx
+ movl %ecx, -7(%edx)
+L(fwd_write_3bytes):
+ movzwl -3(%eax), %ecx
+ movzbl -1(%eax), %eax
+ movw %cx, -3(%edx)
+ movb %al, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN_END
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(large_page):
+ movdqu (%eax), %xmm1
+ movdqu %xmm0, (%esi)
+ movntdq %xmm1, (%edx)
+ add $0x10, %eax
+ add $0x10, %edx
+ sub $0x10, %ecx
+ cmp %al, %dl
+ je L(copy_page_by_rep)
+L(large_page_loop_init):
+ POP (%esi)
+ sub $0x80, %ecx
+ POP (%edi)
+L(large_page_loop):
+ prefetchnta 0x1c0(%eax)
+ prefetchnta 0x280(%eax)
+ movdqu (%eax), %xmm0
+ movdqu 0x10(%eax), %xmm1
+ movdqu 0x20(%eax), %xmm2
+ movdqu 0x30(%eax), %xmm3
+ movdqu 0x40(%eax), %xmm4
+ movdqu 0x50(%eax), %xmm5
+ movdqu 0x60(%eax), %xmm6
+ movdqu 0x70(%eax), %xmm7
+ lea 0x80(%eax), %eax
+ lfence
+ sub $0x80, %ecx
+ movntdq %xmm0, (%edx)
+ movntdq %xmm1, 0x10(%edx)
+ movntdq %xmm2, 0x20(%edx)
+ movntdq %xmm3, 0x30(%edx)
+ movntdq %xmm4, 0x40(%edx)
+ movntdq %xmm5, 0x50(%edx)
+ movntdq %xmm6, 0x60(%edx)
+ movntdq %xmm7, 0x70(%edx)
+ lea 0x80(%edx), %edx
+ jae L(large_page_loop)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(large_page_less_64bytes)
+
+ movdqu (%eax), %xmm0
+ movdqu 0x10(%eax), %xmm1
+ movdqu 0x20(%eax), %xmm2
+ movdqu 0x30(%eax), %xmm3
+ lea 0x40(%eax), %eax
+
+ movntdq %xmm0, (%edx)
+ movntdq %xmm1, 0x10(%edx)
+ movntdq %xmm2, 0x20(%edx)
+ movntdq %xmm3, 0x30(%edx)
+ lea 0x40(%edx), %edx
+ sub $0x40, %ecx
+L(large_page_less_64bytes):
+ cmp $32, %ecx
+ jb L(large_page_less_32bytes)
+ movdqu (%eax), %xmm0
+ movdqu 0x10(%eax), %xmm1
+ lea 0x20(%eax), %eax
+ movntdq %xmm0, (%edx)
+ movntdq %xmm1, 0x10(%edx)
+ lea 0x20(%edx), %edx
+ sub $0x20, %ecx
+L(large_page_less_32bytes):
+ add %ecx, %edx
+ add %ecx, %eax
+ sfence
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(copy_page_by_rep):
+ mov %eax, %esi
+ mov %edx, %edi
+ mov %ecx, %edx
+ shr $2, %ecx
+ and $3, %edx
+ rep movsl
+ jz L(copy_page_by_rep_exit)
+ cmp $2, %edx
+ jb L(copy_page_by_rep_left_1)
+ movzwl (%esi), %eax
+ movw %ax, (%edi)
+ add $2, %esi
+ add $2, %edi
+ sub $2, %edx
+ jz L(copy_page_by_rep_exit)
+L(copy_page_by_rep_left_1):
+ movzbl (%esi), %eax
+ movb %al, (%edi)
+L(copy_page_by_rep_exit):
+ POP (%esi)
+ POP (%edi)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_44bytes):
+ movl 40(%eax), %ecx
+ movl %ecx, 40(%edx)
+L(bk_write_40bytes):
+ movl 36(%eax), %ecx
+ movl %ecx, 36(%edx)
+L(bk_write_36bytes):
+ movl 32(%eax), %ecx
+ movl %ecx, 32(%edx)
+L(bk_write_32bytes):
+ movl 28(%eax), %ecx
+ movl %ecx, 28(%edx)
+L(bk_write_28bytes):
+ movl 24(%eax), %ecx
+ movl %ecx, 24(%edx)
+L(bk_write_24bytes):
+ movl 20(%eax), %ecx
+ movl %ecx, 20(%edx)
+L(bk_write_20bytes):
+ movl 16(%eax), %ecx
+ movl %ecx, 16(%edx)
+L(bk_write_16bytes):
+ movl 12(%eax), %ecx
+ movl %ecx, 12(%edx)
+L(bk_write_12bytes):
+ movl 8(%eax), %ecx
+ movl %ecx, 8(%edx)
+L(bk_write_8bytes):
+ movl 4(%eax), %ecx
+ movl %ecx, 4(%edx)
+L(bk_write_4bytes):
+ movl (%eax), %ecx
+ movl %ecx, (%edx)
+L(bk_write_0bytes):
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_45bytes):
+ movl 41(%eax), %ecx
+ movl %ecx, 41(%edx)
+L(bk_write_41bytes):
+ movl 37(%eax), %ecx
+ movl %ecx, 37(%edx)
+L(bk_write_37bytes):
+ movl 33(%eax), %ecx
+ movl %ecx, 33(%edx)
+L(bk_write_33bytes):
+ movl 29(%eax), %ecx
+ movl %ecx, 29(%edx)
+L(bk_write_29bytes):
+ movl 25(%eax), %ecx
+ movl %ecx, 25(%edx)
+L(bk_write_25bytes):
+ movl 21(%eax), %ecx
+ movl %ecx, 21(%edx)
+L(bk_write_21bytes):
+ movl 17(%eax), %ecx
+ movl %ecx, 17(%edx)
+L(bk_write_17bytes):
+ movl 13(%eax), %ecx
+ movl %ecx, 13(%edx)
+L(bk_write_13bytes):
+ movl 9(%eax), %ecx
+ movl %ecx, 9(%edx)
+L(bk_write_9bytes):
+ movl 5(%eax), %ecx
+ movl %ecx, 5(%edx)
+L(bk_write_5bytes):
+ movl 1(%eax), %ecx
+ movl %ecx, 1(%edx)
+L(bk_write_1bytes):
+ movzbl (%eax), %ecx
+ movb %cl, (%edx)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_46bytes):
+ movl 42(%eax), %ecx
+ movl %ecx, 42(%edx)
+L(bk_write_42bytes):
+ movl 38(%eax), %ecx
+ movl %ecx, 38(%edx)
+L(bk_write_38bytes):
+ movl 34(%eax), %ecx
+ movl %ecx, 34(%edx)
+L(bk_write_34bytes):
+ movl 30(%eax), %ecx
+ movl %ecx, 30(%edx)
+L(bk_write_30bytes):
+ movl 26(%eax), %ecx
+ movl %ecx, 26(%edx)
+L(bk_write_26bytes):
+ movl 22(%eax), %ecx
+ movl %ecx, 22(%edx)
+L(bk_write_22bytes):
+ movl 18(%eax), %ecx
+ movl %ecx, 18(%edx)
+L(bk_write_18bytes):
+ movl 14(%eax), %ecx
+ movl %ecx, 14(%edx)
+L(bk_write_14bytes):
+ movl 10(%eax), %ecx
+ movl %ecx, 10(%edx)
+L(bk_write_10bytes):
+ movl 6(%eax), %ecx
+ movl %ecx, 6(%edx)
+L(bk_write_6bytes):
+ movl 2(%eax), %ecx
+ movl %ecx, 2(%edx)
+L(bk_write_2bytes):
+ movzwl (%eax), %ecx
+ movw %cx, (%edx)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_47bytes):
+ movl 43(%eax), %ecx
+ movl %ecx, 43(%edx)
+L(bk_write_43bytes):
+ movl 39(%eax), %ecx
+ movl %ecx, 39(%edx)
+L(bk_write_39bytes):
+ movl 35(%eax), %ecx
+ movl %ecx, 35(%edx)
+L(bk_write_35bytes):
+ movl 31(%eax), %ecx
+ movl %ecx, 31(%edx)
+L(bk_write_31bytes):
+ movl 27(%eax), %ecx
+ movl %ecx, 27(%edx)
+L(bk_write_27bytes):
+ movl 23(%eax), %ecx
+ movl %ecx, 23(%edx)
+L(bk_write_23bytes):
+ movl 19(%eax), %ecx
+ movl %ecx, 19(%edx)
+L(bk_write_19bytes):
+ movl 15(%eax), %ecx
+ movl %ecx, 15(%edx)
+L(bk_write_15bytes):
+ movl 11(%eax), %ecx
+ movl %ecx, 11(%edx)
+L(bk_write_11bytes):
+ movl 7(%eax), %ecx
+ movl %ecx, 7(%edx)
+L(bk_write_7bytes):
+ movl 3(%eax), %ecx
+ movl %ecx, 3(%edx)
+L(bk_write_3bytes):
+ movzwl 1(%eax), %ecx
+ movw %cx, 1(%edx)
+ movzbl (%eax), %eax
+ movb %al, (%edx)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN_END
+
+
+ .pushsection .rodata.ssse3,"a",@progbits
+ ALIGN (2)
+L(table_48bytes_fwd):
+ .int JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd))
+
+ ALIGN (2)
+L(shl_table):
+ .int JMPTBL (L(shl_0), L(shl_table))
+ .int JMPTBL (L(shl_1), L(shl_table))
+ .int JMPTBL (L(shl_2), L(shl_table))
+ .int JMPTBL (L(shl_3), L(shl_table))
+ .int JMPTBL (L(shl_4), L(shl_table))
+ .int JMPTBL (L(shl_5), L(shl_table))
+ .int JMPTBL (L(shl_6), L(shl_table))
+ .int JMPTBL (L(shl_7), L(shl_table))
+ .int JMPTBL (L(shl_8), L(shl_table))
+ .int JMPTBL (L(shl_9), L(shl_table))
+ .int JMPTBL (L(shl_10), L(shl_table))
+ .int JMPTBL (L(shl_11), L(shl_table))
+ .int JMPTBL (L(shl_12), L(shl_table))
+ .int JMPTBL (L(shl_13), L(shl_table))
+ .int JMPTBL (L(shl_14), L(shl_table))
+ .int JMPTBL (L(shl_15), L(shl_table))
+
+ ALIGN (2)
+L(table_48_bytes_bwd):
+ .int JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd))
+
+ .popsection
+
+#ifdef USE_AS_MEMMOVE
+ ALIGN (4)
+L(copy_backward):
+ PUSH (%esi)
+ movl %eax, %esi
+ add %ecx, %edx
+ add %ecx, %esi
+ testl $0x3, %edx
+ jnz L(bk_align)
+
+L(bk_aligned_4):
+ cmp $64, %ecx
+ jae L(bk_write_more64bytes)
+
+L(bk_write_64bytesless):
+ cmp $32, %ecx
+ jb L(bk_write_less32bytes)
+
+L(bk_write_more32bytes):
+ /* Copy 32 bytes at a time. */
+ sub $32, %ecx
+ movl -4(%esi), %eax
+ movl %eax, -4(%edx)
+ movl -8(%esi), %eax
+ movl %eax, -8(%edx)
+ movl -12(%esi), %eax
+ movl %eax, -12(%edx)
+ movl -16(%esi), %eax
+ movl %eax, -16(%edx)
+ movl -20(%esi), %eax
+ movl %eax, -20(%edx)
+ movl -24(%esi), %eax
+ movl %eax, -24(%edx)
+ movl -28(%esi), %eax
+ movl %eax, -28(%edx)
+ movl -32(%esi), %eax
+ movl %eax, -32(%edx)
+ sub $32, %edx
+ sub $32, %esi
+
+L(bk_write_less32bytes):
+ movl %esi, %eax
+ sub %ecx, %edx
+ sub %ecx, %eax
+ POP (%esi)
+L(bk_write_less48bytes):
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
+
+ CFI_PUSH (%esi)
+ ALIGN (4)
+L(bk_align):
+ cmp $8, %ecx
+ jbe L(bk_write_less32bytes)
+ testl $1, %edx
+ /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
+ then (EDX & 2) must be != 0. */
+ jz L(bk_got2)
+ sub $1, %esi
+ sub $1, %ecx
+ sub $1, %edx
+ movzbl (%esi), %eax
+ movb %al, (%edx)
+
+ testl $2, %edx
+ jz L(bk_aligned_4)
+
+L(bk_got2):
+ sub $2, %esi
+ sub $2, %ecx
+ sub $2, %edx
+ movzwl (%esi), %eax
+ movw %ax, (%edx)
+ jmp L(bk_aligned_4)
+
+ ALIGN (4)
+L(bk_write_more64bytes):
+ /* Check alignment of last byte. */
+ testl $15, %edx
+ jz L(bk_ssse3_cpy_pre)
+
+/* EDX is aligned 4 bytes, but not 16 bytes. */
+L(bk_ssse3_align):
+ sub $4, %esi
+ sub $4, %ecx
+ sub $4, %edx
+ movl (%esi), %eax
+ movl %eax, (%edx)
+
+ testl $15, %edx
+ jz L(bk_ssse3_cpy_pre)
+
+ sub $4, %esi
+ sub $4, %ecx
+ sub $4, %edx
+ movl (%esi), %eax
+ movl %eax, (%edx)
+
+ testl $15, %edx
+ jz L(bk_ssse3_cpy_pre)
+
+ sub $4, %esi
+ sub $4, %ecx
+ sub $4, %edx
+ movl (%esi), %eax
+ movl %eax, (%edx)
+
+L(bk_ssse3_cpy_pre):
+ cmp $64, %ecx
+ jb L(bk_write_more32bytes)
+
+L(bk_ssse3_cpy):
+ sub $64, %esi
+ sub $64, %ecx
+ sub $64, %edx
+ movdqu 0x30(%esi), %xmm3
+ movdqa %xmm3, 0x30(%edx)
+ movdqu 0x20(%esi), %xmm2
+ movdqa %xmm2, 0x20(%edx)
+ movdqu 0x10(%esi), %xmm1
+ movdqa %xmm1, 0x10(%edx)
+ movdqu (%esi), %xmm0
+ movdqa %xmm0, (%edx)
+ cmp $64, %ecx
+ jae L(bk_ssse3_cpy)
+ jmp L(bk_write_64bytesless)
+
+#endif
+
+END (MEMCPY)
diff --git a/sysdeps/i386/i686/multiarch/mpx_memmove_nobnd.S b/sysdeps/i386/i686/multiarch/mpx_memmove_nobnd.S
new file mode 100644
index 0000000000..caaa89aea8
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mpx_memmove_nobnd.S
@@ -0,0 +1,3 @@
+#define USE_AS_MEMMOVE
+#define MEMCPY mpx_memmove_nobnd
+#include "mpx_memcpy_nobnd.S"
diff --git a/sysdeps/i386/i686/multiarch/mpx_mempcpy_nobnd.S b/sysdeps/i386/i686/multiarch/mpx_mempcpy_nobnd.S
new file mode 100644
index 0000000000..4b0af499e4
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mpx_mempcpy_nobnd.S
@@ -0,0 +1,3 @@
+#define USE_AS_MEMPCPY
+#define MEMCPY mpx_mempcpy_nobnd
+#include "mpx_memcpy_nobnd.S"
diff --git a/sysdeps/i386/i686/multiarch/strcat-sse2.S b/sysdeps/i386/i686/multiarch/strcat-sse2.S
index 62d60cdb78..b1d39ae5cb 100644
--- a/sysdeps/i386/i686/multiarch/strcat-sse2.S
+++ b/sysdeps/i386/i686/multiarch/strcat-sse2.S
@@ -95,10 +95,20 @@ ENTRY (STRCAT)
test %ebx, %ebx
jz L(ExitZero)
# endif
+# ifdef __CHKP__
+ bndldx STR1(%esp,%eax,1), %bnd0
+ bndldx STR2(%esp,%esi,1), %bnd1
+ bndcl (%esi), %bnd1
+ bndcu (%esi), %bnd1
+# endif
cmpb $0, (%esi)
mov %esi, %ecx
mov %eax, %edx
jz L(ExitZero)
+# ifdef __CHKP__
+ bndcl (%eax), %bnd0
+ bndcu (%eax), %bnd0
+# endif
and $63, %ecx
and $63, %edx
@@ -113,6 +123,9 @@ ENTRY (STRCAT)
movdqu (%eax), %xmm1
movdqu (%esi), %xmm5
pcmpeqb %xmm1, %xmm0
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
movdqu 16(%esi), %xmm6
pmovmskb %xmm0, %ecx
pcmpeqb %xmm5, %xmm4
@@ -132,6 +145,9 @@ L(alignment_prolog):
and $-16, %eax
pcmpeqb (%eax), %xmm0
movdqu (%esi), %xmm5
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
movdqu 16(%esi), %xmm6
pmovmskb %xmm0, %edx
pcmpeqb %xmm5, %xmm4
@@ -148,21 +164,33 @@ L(loop_prolog):
pxor %xmm3, %xmm3
.p2align 4
L(align16_loop):
+# ifdef __CHKP__
+ bndcu 16(%eax), %bnd0
+# endif
pcmpeqb 16(%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%eax), %bnd0
+# endif
pcmpeqb 32(%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%eax), %bnd0
+# endif
pcmpeqb 48(%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%eax), %bnd0
+# endif
pcmpeqb 64(%eax), %xmm3
pmovmskb %xmm3, %edx
lea 64(%eax), %eax
@@ -212,6 +240,9 @@ L(StartStrcpyPart):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail1)
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm5, (%eax)
pmovmskb %xmm7, %edx
# ifdef USE_AS_STRNCAT
@@ -250,21 +281,33 @@ L(StrlenCore7_1):
.p2align 4
L(align16_loop_1):
+# ifdef __CHKP__
+ bndcu 16(%eax), %bnd0
+# endif
pcmpeqb 16(%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16_1)
+# ifdef __CHKP__
+ bndcu 32(%eax), %bnd0
+# endif
pcmpeqb 32(%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32_1)
+# ifdef __CHKP__
+ bndcu 48(%eax), %bnd0
+# endif
pcmpeqb 48(%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48_1)
+# ifdef __CHKP__
+ bndcu 64(%eax), %bnd0
+# endif
pcmpeqb 64(%eax), %xmm3
pmovmskb %xmm3, %edx
lea 64(%eax), %eax
@@ -323,6 +366,9 @@ L(StartStrcpyPart_1):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail)
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
pcmpeqb 16(%esi), %xmm0
pmovmskb %xmm0, %edx
# ifdef USE_AS_STRNCAT
@@ -341,6 +387,9 @@ L(Unalign16Both):
mov $16, %ecx
movdqa (%esi, %ecx), %xmm1
movaps 16(%esi, %ecx), %xmm2
+# ifdef __CHKP__
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movdqu %xmm1, (%eax, %ecx)
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %edx
@@ -352,6 +401,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
L(Unalign16BothBigN):
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm3
movdqu %xmm2, (%eax, %ecx)
pcmpeqb %xmm3, %xmm0
@@ -364,6 +417,10 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm4
movdqu %xmm3, (%eax, %ecx)
pcmpeqb %xmm4, %xmm0
@@ -376,6 +433,10 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm1
movdqu %xmm4, (%eax, %ecx)
pcmpeqb %xmm1, %xmm0
@@ -388,6 +449,10 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm2
movdqu %xmm1, (%eax, %ecx)
pcmpeqb %xmm2, %xmm0
@@ -400,6 +465,10 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm3
movdqu %xmm2, (%eax, %ecx)
pcmpeqb %xmm3, %xmm0
@@ -412,6 +481,9 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movdqu %xmm3, (%eax, %ecx)
mov %esi, %edx
lea 16(%esi, %ecx), %esi
@@ -421,6 +493,9 @@ L(Unalign16BothBigN):
# ifdef USE_AS_STRNCAT
lea 128(%ebx, %edx), %ebx
# endif
+# ifdef __CHKP__
+ bndcu (%esi), %bnd1
+# endif
movaps (%esi), %xmm2
movaps %xmm2, %xmm4
movaps 16(%esi), %xmm5
@@ -443,6 +518,10 @@ L(Unalign16BothBigN):
L(Unaligned64Loop_start):
add $64, %eax
add $64, %esi
+# ifdef __CHKP__
+ bndcu (%esi), %bnd1
+ bndcu -1(%eax), %bnd0
+# endif
movdqu %xmm4, -64(%eax)
movaps (%esi), %xmm2
movdqa %xmm2, %xmm4
@@ -485,11 +564,18 @@ L(Unaligned64Leave):
jnz L(CopyFrom1To16BytesUnaligned_32)
bsf %ecx, %edx
+# ifdef __CHKP__
+ bndcu 47(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
movdqu %xmm5, 16(%eax)
movdqu %xmm6, 32(%eax)
add $48, %esi
add $48, %eax
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
# ifdef USE_AS_STRNCAT
@@ -501,12 +587,18 @@ L(BigN):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail)
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
pcmpeqb 16(%esi), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(CopyFrom1To32Bytes)
movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm1, (%eax)
sub %ecx, %eax
sub $48, %ebx
@@ -515,6 +607,9 @@ L(BigN):
mov $16, %ecx
movdqa (%esi, %ecx), %xmm1
movaps 16(%esi, %ecx), %xmm2
+# ifdef __CHKP__
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movdqu %xmm1, (%eax, %ecx)
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %edx
@@ -532,12 +627,20 @@ L(CopyFrom1To16Bytes):
add %ecx, %eax
add %ecx, %esi
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesTail):
add %ecx, %esi
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
@@ -546,6 +649,10 @@ L(CopyFrom1To32Bytes1):
add $16, %eax
L(CopyFrom1To16BytesTail1):
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
@@ -554,34 +661,60 @@ L(CopyFrom1To32Bytes):
add %ecx, %esi
add $16, %edx
sub %ecx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesUnaligned_0):
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesUnaligned_16):
bsf %ecx, %edx
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
add $16, %esi
add $16, %eax
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesUnaligned_32):
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu 31(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
movdqu %xmm5, 16(%eax)
add $32, %esi
add $32, %eax
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
# ifdef USE_AS_STRNCAT
.p2align 4
L(CopyFrom1To16BytesExit):
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
/* Case2 */
@@ -594,6 +727,10 @@ L(CopyFrom1To16BytesCase2):
bsf %edx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
@@ -605,6 +742,10 @@ L(CopyFrom1To32BytesCase2):
sub %ecx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
L(CopyFrom1To16BytesTailCase2):
@@ -613,12 +754,20 @@ L(CopyFrom1To16BytesTailCase2):
bsf %edx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
L(CopyFrom1To16BytesTail1Case2):
bsf %edx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
/* Case2 or Case3, Case3 */
@@ -631,6 +780,10 @@ L(CopyFrom1To16BytesCase3):
add $16, %ebx
add %ecx, %eax
add %ecx, %esi
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
@@ -639,6 +792,10 @@ L(CopyFrom1To32BytesCase2OrCase3):
jnz L(CopyFrom1To32BytesCase2)
sub %ecx, %ebx
add %ecx, %esi
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
@@ -647,6 +804,10 @@ L(CopyFrom1To16BytesTailCase2OrCase3):
jnz L(CopyFrom1To16BytesTailCase2)
sub %ecx, %ebx
add %ecx, %esi
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
@@ -657,6 +818,10 @@ L(CopyFrom1To32Bytes1Case2OrCase3):
L(CopyFrom1To16BytesTail1Case2OrCase3):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail1Case2)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
# endif
@@ -1110,15 +1275,27 @@ L(Unaligned64LeaveCase3):
and $-16, %ecx
add $48, %ebx
jl L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
sub $16, %ebx
jb L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 31(%eax), %bnd0
+# endif
movdqu %xmm5, 16(%eax)
sub $16, %ebx
jb L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 47(%eax), %bnd0
+# endif
movdqu %xmm6, 32(%eax)
sub $16, %ebx
jb L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 63(%eax), %bnd0
+# endif
movdqu %xmm7, 48(%eax)
xor %bh, %bh
movb %bh, 64(%eax)
@@ -1137,6 +1314,9 @@ L(Unaligned64LeaveCase2):
pcmpeqb %xmm5, %xmm0
pmovmskb %xmm0, %edx
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
add $16, %ecx
sub $16, %ebx
@@ -1146,6 +1326,9 @@ L(Unaligned64LeaveCase2):
pcmpeqb %xmm6, %xmm0
pmovmskb %xmm0, %edx
+# ifdef __CHKP__
+ bndcu 31(%eax), %bnd0
+# endif
movdqu %xmm5, 16(%eax)
add $16, %ecx
sub $16, %ebx
@@ -1155,6 +1338,9 @@ L(Unaligned64LeaveCase2):
pcmpeqb %xmm7, %xmm0
pmovmskb %xmm0, %edx
+# ifdef __CHKP__
+ bndcu 47(%eax), %bnd0
+# endif
movdqu %xmm6, 32(%eax)
lea 16(%eax, %ecx), %eax
lea 16(%esi, %ecx), %esi
diff --git a/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
index 938d74d318..1e59581f3e 100644
--- a/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
+++ b/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
@@ -46,6 +46,12 @@ ENTRY (__strchr_sse2_bsf)
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx), %bnd0
+ bndcu (%ecx), %bnd0
+# endif
+
pxor %xmm2, %xmm2
mov %ecx, %edi
punpcklbw %xmm1, %xmm1
@@ -81,6 +87,9 @@ ENTRY (__strchr_sse2_bsf)
L(unaligned_match):
add %edi, %eax
add %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
.p2align 4
@@ -94,6 +103,9 @@ L(unaligned_no_match):
.p2align 4
/* Loop start on aligned string. */
L(loop):
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -103,6 +115,9 @@ L(loop):
or %eax, %edx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -112,6 +127,9 @@ L(loop):
or %eax, %edx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -121,6 +139,9 @@ L(loop):
or %eax, %edx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -146,6 +167,9 @@ L(matches):
L(match):
sub $16, %edi
add %edi, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
/* Return NULL. */
diff --git a/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/sysdeps/i386/i686/multiarch/strcmp-sse4.S
index 355ed4e674..1958b36360 100644
--- a/sysdeps/i386/i686/multiarch/strcmp-sse4.S
+++ b/sysdeps/i386/i686/multiarch/strcmp-sse4.S
@@ -222,6 +222,12 @@ L(ascii):
test REM, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndldx STR1(%esp,%edx,1), %bnd0
+ bndldx STR2(%esp,%eax,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%eax), %bnd1
+#endif
mov %dx, %cx
and $0xfff, %cx
cmp $0xff0, %cx
@@ -280,6 +286,10 @@ L(ascii):
add $16, %edx
add $16, %eax
L(first4bytes):
+#ifdef __CHKP__
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
movzbl (%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl (%edx), %edi
@@ -303,6 +313,10 @@ L(first4bytes):
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 1(%edx), %bnd0
+ bndcu 1(%eax), %bnd1
+#endif
movzbl 1(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 1(%edx), %edi
@@ -325,6 +339,10 @@ L(first4bytes):
cmp $2, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 2(%edx), %bnd0
+ bndcu 2(%eax), %bnd1
+#endif
movzbl 2(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 2(%edx), %edi
@@ -347,6 +365,10 @@ L(first4bytes):
cmp $3, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+ bndcu 3(%eax), %bnd1
+#endif
movzbl 3(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 3(%edx), %edi
@@ -369,6 +391,10 @@ L(first4bytes):
cmp $4, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+ bndcu 4(%eax), %bnd1
+#endif
movzbl 4(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 4(%edx), %edi
@@ -391,6 +417,10 @@ L(first4bytes):
cmp $5, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 5(%edx), %bnd0
+ bndcu 5(%eax), %bnd1
+#endif
movzbl 5(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 5(%edx), %edi
@@ -413,6 +443,10 @@ L(first4bytes):
cmp $6, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 6(%edx), %bnd0
+ bndcu 6(%eax), %bnd1
+#endif
movzbl 6(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 6(%edx), %edi
@@ -435,6 +469,10 @@ L(first4bytes):
cmp $7, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+ bndcu 7(%eax), %bnd1
+#endif
movzbl 7(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 7(%edx), %edi
@@ -483,6 +521,10 @@ L(check_offset):
testl %edx, %edx
jg L(crosspage)
L(loop):
+#ifdef __CHKP__
+ bndcu (%edi,%edx), %bnd0
+ bndcu (%esi,%edx), %bnd1
+#endif
movdqu (%esi,%edx), %xmm2
movdqu (%edi,%edx), %xmm1
TOLOWER (%xmm2, %xmm1)
@@ -497,6 +539,10 @@ L(loop):
add $16, %edx
jle L(loop)
L(crosspage):
+#ifdef __CHKP__
+ bndcu (%edi,%edx), %bnd0
+ bndcu (%esi,%edx), %bnd1
+#endif
movzbl (%edi,%edx), %eax
movzbl (%esi,%edx), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
@@ -625,6 +671,10 @@ L(less16bytes):
add $8, %eax
L(less4bytes):
+#ifdef __CHKP__
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
movzbl (%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl (%edx), %edi
@@ -647,6 +697,10 @@ L(less4bytes):
cmp $1, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 1(%edx), %bnd0
+ bndcu 1(%eax), %bnd1
+#endif
movzbl 1(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 1(%edx), %edi
@@ -670,6 +724,10 @@ L(less4bytes):
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 2(%edx), %bnd0
+ bndcu 2(%eax), %bnd1
+#endif
movzbl 2(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 2(%edx), %edi
@@ -692,6 +750,10 @@ L(less4bytes):
cmp $3, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+ bndcu 3(%eax), %bnd1
+#endif
movzbl 3(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 3(%edx), %edi
@@ -715,6 +777,10 @@ L(more4bytes):
cmp $4, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+ bndcu 4(%eax), %bnd1
+#endif
movzbl 4(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 4(%edx), %edi
@@ -738,6 +804,10 @@ L(more4bytes):
cmp $5, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 5(%edx), %bnd0
+ bndcu 5(%eax), %bnd1
+#endif
movzbl 5(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 5(%edx), %edi
@@ -760,6 +830,10 @@ L(more4bytes):
cmp $6, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 6(%edx), %bnd0
+ bndcu 6(%eax), %bnd1
+#endif
movzbl 6(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 6(%edx), %edi
@@ -782,6 +856,10 @@ L(more4bytes):
cmp $7, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+ bndcu 7(%eax), %bnd1
+#endif
movzbl 7(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 7(%edx), %edi
diff --git a/sysdeps/i386/i686/multiarch/strcpy-sse2.S b/sysdeps/i386/i686/multiarch/strcpy-sse2.S
index d942ac2852..4fdf7e0de1 100644
--- a/sysdeps/i386/i686/multiarch/strcpy-sse2.S
+++ b/sysdeps/i386/i686/multiarch/strcpy-sse2.S
@@ -85,6 +85,14 @@ ENTRY (STRCPY)
movl LEN(%esp), %ebx
test %ebx, %ebx
jz L(ExitZero)
+# ifdef __CHKP__
+ bndldx STR1(%esp,%edi,1), %bnd0
+ bndldx STR2(%esp,%esi,1), %bnd1
+ bndcl (%esi), %bnd1
+ bndcu (%esi), %bnd1
+ bndcl (%edi), %bnd0
+ bndcu -1(%edi, %ebx), %bnd0
+# endif
mov %esi, %ecx
# ifndef USE_AS_STPCPY
@@ -111,6 +119,9 @@ ENTRY (STRCPY)
test %edx, %edx
jnz L(CopyFrom1To16BytesTail)
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
pcmpeqb 16(%esi), %xmm0
pmovmskb %xmm0, %edx
# ifdef USE_AS_STPCPY
@@ -124,6 +135,9 @@ ENTRY (STRCPY)
jnz L(CopyFrom1To32Bytes)
movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
+# ifdef __CHKP__
+ bndcu 15(%edi), %bnd0
+# endif
movdqu %xmm1, (%edi)
sub %ecx, %edi
@@ -132,6 +146,10 @@ ENTRY (STRCPY)
.p2align 4
L(Unalign16Both):
mov $16, %ecx
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movdqa (%esi, %ecx), %xmm1
movaps 16(%esi, %ecx), %xmm2
movdqu %xmm1, (%edi, %ecx)
@@ -143,6 +161,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm2)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm3
movdqu %xmm2, (%edi, %ecx)
pcmpeqb %xmm3, %xmm0
@@ -153,6 +175,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm3)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm4
movdqu %xmm3, (%edi, %ecx)
pcmpeqb %xmm4, %xmm0
@@ -163,6 +189,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm4)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm1
movdqu %xmm4, (%edi, %ecx)
pcmpeqb %xmm1, %xmm0
@@ -173,6 +203,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm1)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm2
movdqu %xmm1, (%edi, %ecx)
pcmpeqb %xmm2, %xmm0
@@ -183,6 +217,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm2)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm3
movdqu %xmm2, (%edi, %ecx)
pcmpeqb %xmm3, %xmm0
@@ -193,6 +231,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm3)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movdqu %xmm3, (%edi, %ecx)
mov %esi, %edx
lea 16(%esi, %ecx), %esi
@@ -202,6 +244,9 @@ L(Unalign16Both):
lea 128(%ebx, %edx), %ebx
L(Unaligned64Loop):
+# ifdef __CHKP__
+ bndcu 48(%esi), %bnd1
+# endif
movaps (%esi), %xmm2
movaps %xmm2, %xmm4
movaps 16(%esi), %xmm5
@@ -220,6 +265,10 @@ L(Unaligned64Loop):
L(Unaligned64Loop_start):
add $64, %edi
add $64, %esi
+# ifdef __CHKP__
+ bndcu (%esi), %bnd1
+ bndcu (%edi), %bnd0
+# endif
movdqu %xmm4, -64(%edi)
movaps (%esi), %xmm2
movdqa %xmm2, %xmm4
@@ -259,15 +308,27 @@ L(Unaligned64Leave):
jnz L(CopyFrom1To16BytesUnaligned_32)
bsf %ecx, %edx
+# ifdef __CHKP__
+ bndcu 47(%edi), %bnd0
+# endif
movdqu %xmm4, (%edi)
movdqu %xmm5, 16(%edi)
movdqu %xmm6, 32(%edi)
# ifdef USE_AS_STPCPY
+# ifdef __CHKP__
+ bndcu 48(%edi, %edx), %bnd0
+# endif
lea 48(%edi, %edx), %eax
# endif
+# ifdef __CHKP__
+ bndcu 63(%edi), %bnd0
+# endif
movdqu %xmm7, 48(%edi)
add $15, %ebx
sub %edx, %ebx
+# ifdef __CHKP__
+ bndcu 49(%edi, %edx), %bnd0
+# endif
lea 49(%edi, %edx), %edi
jmp L(StrncpyFillTailWithZero)
@@ -288,6 +349,10 @@ L(SourceStringAlignmentZero):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail1)
+# ifdef __CHKP__
+ bndcu 15(%edi), %bnd0
+ bndcu 16(%esi), %bnd1
+# endif
pcmpeqb 16(%esi), %xmm0
movdqu %xmm1, (%edi)
pmovmskb %xmm0, %edx
@@ -313,7 +378,7 @@ L(CopyFrom1To16BytesTail):
bsf %edx, %edx
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
- .p2align 4
+ .p2align 9
L(CopyFrom1To32Bytes1):
add $16, %esi
add $16, %edi
@@ -337,6 +402,9 @@ L(CopyFrom1To16BytesUnaligned_0):
# ifdef USE_AS_STPCPY
lea (%edi, %edx), %eax
# endif
+# ifdef __CHKP__
+ bndcu 15(%edi), %bnd0
+# endif
movdqu %xmm4, (%edi)
add $63, %ebx
sub %edx, %ebx
@@ -350,6 +418,9 @@ L(CopyFrom1To16BytesUnaligned_16):
# ifdef USE_AS_STPCPY
lea 16(%edi, %edx), %eax
# endif
+# ifdef __CHKP__
+ bndcu 31(%edi), %bnd0
+# endif
movdqu %xmm5, 16(%edi)
add $47, %ebx
sub %edx, %ebx
@@ -364,6 +435,9 @@ L(CopyFrom1To16BytesUnaligned_32):
# ifdef USE_AS_STPCPY
lea 32(%edi, %edx), %eax
# endif
+# ifdef __CHKP__
+ bndcu 47(%edi), %bnd0
+# endif
movdqu %xmm6, 32(%edi)
add $31, %ebx
sub %edx, %ebx
@@ -495,6 +569,9 @@ L(Exit1):
.p2align 4
L(Exit2):
movw (%esi), %dx
+# ifdef __CHKP__
+ bndcu 1(%edi), %bnd0
+# endif
movw %dx, (%edi)
# ifdef USE_AS_STPCPY
lea 1(%edi), %eax
@@ -507,6 +584,9 @@ L(Exit2):
.p2align 4
L(Exit3):
movw (%esi), %cx
+# ifdef __CHKP__
+ bndcu 2(%edi), %bnd0
+# endif
movw %cx, (%edi)
movb %dh, 2(%edi)
# ifdef USE_AS_STPCPY
@@ -520,6 +600,9 @@ L(Exit3):
.p2align 4
L(Exit4):
movl (%esi), %edx
+# ifdef __CHKP__
+ bndcu 3(%edi), %bnd0
+# endif
movl %edx, (%edi)
# ifdef USE_AS_STPCPY
lea 3(%edi), %eax
@@ -532,6 +615,9 @@ L(Exit4):
.p2align 4
L(Exit5):
movl (%esi), %ecx
+# ifdef __CHKP__
+ bndcu 4(%edi), %bnd0
+# endif
movb %dh, 4(%edi)
movl %ecx, (%edi)
# ifdef USE_AS_STPCPY
@@ -546,6 +632,9 @@ L(Exit5):
L(Exit6):
movl (%esi), %ecx
movw 4(%esi), %dx
+# ifdef __CHKP__
+ bndcu 5(%edi), %bnd0
+# endif
movl %ecx, (%edi)
movw %dx, 4(%edi)
# ifdef USE_AS_STPCPY
@@ -560,6 +649,9 @@ L(Exit6):
L(Exit7):
movl (%esi), %ecx
movl 3(%esi), %edx
+# ifdef __CHKP__
+ bndcu 6(%edi), %bnd0
+# endif
movl %ecx, (%edi)
movl %edx, 3(%edi)
# ifdef USE_AS_STPCPY
@@ -573,6 +665,9 @@ L(Exit7):
.p2align 4
L(Exit8):
movlpd (%esi), %xmm0
+# ifdef __CHKP__
+ bndcu 7(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
# ifdef USE_AS_STPCPY
lea 7(%edi), %eax
@@ -585,6 +680,9 @@ L(Exit8):
.p2align 4
L(Exit9):
movlpd (%esi), %xmm0
+# ifdef __CHKP__
+ bndcu 8(%edi), %bnd0
+# endif
movb %dh, 8(%edi)
movlpd %xmm0, (%edi)
# ifdef USE_AS_STPCPY
@@ -599,6 +697,9 @@ L(Exit9):
L(Exit10):
movlpd (%esi), %xmm0
movw 8(%esi), %dx
+# ifdef __CHKP__
+ bndcu 9(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movw %dx, 8(%edi)
# ifdef USE_AS_STPCPY
@@ -613,6 +714,9 @@ L(Exit10):
L(Exit11):
movlpd (%esi), %xmm0
movl 7(%esi), %edx
+# ifdef __CHKP__
+ bndcu 10(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movl %edx, 7(%edi)
# ifdef USE_AS_STPCPY
@@ -627,6 +731,9 @@ L(Exit11):
L(Exit12):
movlpd (%esi), %xmm0
movl 8(%esi), %edx
+# ifdef __CHKP__
+ bndcu 11(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movl %edx, 8(%edi)
# ifdef USE_AS_STPCPY
@@ -641,6 +748,9 @@ L(Exit12):
L(Exit13):
movlpd (%esi), %xmm0
movlpd 5(%esi), %xmm1
+# ifdef __CHKP__
+ bndcu 12(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movlpd %xmm1, 5(%edi)
# ifdef USE_AS_STPCPY
@@ -655,6 +765,9 @@ L(Exit13):
L(Exit14):
movlpd (%esi), %xmm0
movlpd 6(%esi), %xmm1
+# ifdef __CHKP__
+ bndcu 13(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movlpd %xmm1, 6(%edi)
# ifdef USE_AS_STPCPY
@@ -669,6 +782,9 @@ L(Exit14):
L(Exit15):
movlpd (%esi), %xmm0
movlpd 7(%esi), %xmm1
+# ifdef __CHKP__
+ bndcu 14(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movlpd %xmm1, 7(%edi)
# ifdef USE_AS_STPCPY
@@ -682,6 +798,9 @@ L(Exit15):
.p2align 4
L(Exit16):
movdqu (%esi), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
# ifdef USE_AS_STPCPY
lea 15(%edi), %eax
@@ -694,6 +813,9 @@ L(Exit16):
.p2align 4
L(Exit17):
movdqu (%esi), %xmm0
+# ifdef __CHKP__
+ bndcu 16(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movb %dh, 16(%edi)
# ifdef USE_AS_STPCPY
@@ -708,6 +830,9 @@ L(Exit17):
L(Exit18):
movdqu (%esi), %xmm0
movw 16(%esi), %cx
+# ifdef __CHKP__
+ bndcu 17(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movw %cx, 16(%edi)
# ifdef USE_AS_STPCPY
@@ -722,6 +847,9 @@ L(Exit18):
L(Exit19):
movdqu (%esi), %xmm0
movl 15(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 18(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movl %ecx, 15(%edi)
# ifdef USE_AS_STPCPY
@@ -736,6 +864,9 @@ L(Exit19):
L(Exit20):
movdqu (%esi), %xmm0
movl 16(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 19(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movl %ecx, 16(%edi)
# ifdef USE_AS_STPCPY
@@ -750,6 +881,9 @@ L(Exit20):
L(Exit21):
movdqu (%esi), %xmm0
movl 16(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 20(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movl %ecx, 16(%edi)
movb %dh, 20(%edi)
@@ -765,6 +899,9 @@ L(Exit21):
L(Exit22):
movdqu (%esi), %xmm0
movlpd 14(%esi), %xmm3
+# ifdef __CHKP__
+ bndcu 21(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm3, 14(%edi)
# ifdef USE_AS_STPCPY
@@ -779,6 +916,9 @@ L(Exit22):
L(Exit23):
movdqu (%esi), %xmm0
movlpd 15(%esi), %xmm3
+# ifdef __CHKP__
+ bndcu 22(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm3, 15(%edi)
# ifdef USE_AS_STPCPY
@@ -793,6 +933,9 @@ L(Exit23):
L(Exit24):
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 23(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
# ifdef USE_AS_STPCPY
@@ -807,6 +950,9 @@ L(Exit24):
L(Exit25):
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 24(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
movb %dh, 24(%edi)
@@ -823,6 +969,9 @@ L(Exit26):
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
movw 24(%esi), %cx
+# ifdef __CHKP__
+ bndcu 25(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
movw %cx, 24(%edi)
@@ -839,6 +988,9 @@ L(Exit27):
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
movl 23(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 26(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
movl %ecx, 23(%edi)
@@ -855,6 +1007,9 @@ L(Exit28):
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
movl 24(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 27(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
movl %ecx, 24(%edi)
@@ -870,6 +1025,9 @@ L(Exit28):
L(Exit29):
movdqu (%esi), %xmm0
movdqu 13(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 28(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movdqu %xmm2, 13(%edi)
# ifdef USE_AS_STPCPY
@@ -884,6 +1042,9 @@ L(Exit29):
L(Exit30):
movdqu (%esi), %xmm0
movdqu 14(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 29(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movdqu %xmm2, 14(%edi)
# ifdef USE_AS_STPCPY
@@ -899,6 +1060,9 @@ L(Exit30):
L(Exit31):
movdqu (%esi), %xmm0
movdqu 15(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 30(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movdqu %xmm2, 15(%edi)
# ifdef USE_AS_STPCPY
@@ -913,6 +1077,9 @@ L(Exit31):
L(Exit32):
movdqu (%esi), %xmm0
movdqu 16(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 31(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movdqu %xmm2, 16(%edi)
# ifdef USE_AS_STPCPY
@@ -1612,37 +1779,90 @@ ENTRY (STRCPY)
ENTRANCE
mov STR1(%esp), %edx
mov STR2(%esp), %ecx
+# ifdef __CHKP__
+ bndldx STR1(%esp,%edx,1), %bnd0
+ bndldx STR2(%esp,%ecx,1), %bnd1
+ bndcl (%ecx), %bnd1
+ bndcu (%ecx), %bnd1
+ bndcl (%edx), %bnd0
+ bndcu (%edx), %bnd0
+# endif
cmpb $0, (%ecx)
jz L(ExitTail1)
+# ifdef __CHKP__
+ bndcu 1(%ecx), %bnd1
+# endif
cmpb $0, 1(%ecx)
jz L(ExitTail2)
+# ifdef __CHKP__
+ bndcu 2(%ecx), %bnd1
+# endif
cmpb $0, 2(%ecx)
jz L(ExitTail3)
+# ifdef __CHKP__
+ bndcu 3(%ecx), %bnd1
+# endif
cmpb $0, 3(%ecx)
jz L(ExitTail4)
+# ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+# endif
cmpb $0, 4(%ecx)
jz L(ExitTail5)
+# ifdef __CHKP__
+ bndcu 5(%ecx), %bnd1
+# endif
cmpb $0, 5(%ecx)
jz L(ExitTail6)
+# ifdef __CHKP__
+ bndcu 6(%ecx), %bnd1
+# endif
cmpb $0, 6(%ecx)
jz L(ExitTail7)
+# ifdef __CHKP__
+ bndcu 7(%ecx), %bnd1
+# endif
cmpb $0, 7(%ecx)
jz L(ExitTail8)
+# ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+# endif
cmpb $0, 8(%ecx)
jz L(ExitTail9)
+# ifdef __CHKP__
+ bndcu 9(%ecx), %bnd1
+# endif
cmpb $0, 9(%ecx)
jz L(ExitTail10)
+# ifdef __CHKP__
+ bndcu 10(%ecx), %bnd1
+# endif
cmpb $0, 10(%ecx)
jz L(ExitTail11)
+# ifdef __CHKP__
+ bndcu 11(%ecx), %bnd1
+# endif
cmpb $0, 11(%ecx)
jz L(ExitTail12)
+# ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+# endif
cmpb $0, 12(%ecx)
jz L(ExitTail13)
+# ifdef __CHKP__
+ bndcu 13(%ecx), %bnd1
+# endif
cmpb $0, 13(%ecx)
jz L(ExitTail14)
+# ifdef __CHKP__
+ bndcu 14(%ecx), %bnd1
+# endif
cmpb $0, 14(%ecx)
jz L(ExitTail15)
+# ifdef __CHKP__
+ bndcu 15(%ecx), %bnd1
+# endif
cmpb $0, 15(%ecx)
jz L(ExitTail16)
@@ -1654,6 +1874,9 @@ ENTRY (STRCPY)
and $-16, %ebx
pxor %xmm0, %xmm0
movdqu (%ecx), %xmm1
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movdqu %xmm1, (%edx)
pcmpeqb (%ebx), %xmm0
pmovmskb %xmm0, %eax
@@ -1669,6 +1892,10 @@ ENTRY (STRCPY)
xor %ebx, %ebx
.p2align 4
+# ifdef __CHKP__
+ bndcu 16(%ecx), %bnd1
+ bndcu 15(%edx), %bnd0
+# endif
movdqa (%ecx), %xmm1
movaps 16(%ecx), %xmm2
movdqu %xmm1, (%edx)
@@ -1678,6 +1905,10 @@ ENTRY (STRCPY)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm3
movdqu %xmm2, (%edx, %ebx)
pcmpeqb %xmm3, %xmm0
@@ -1686,6 +1917,10 @@ ENTRY (STRCPY)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm4
movdqu %xmm3, (%edx, %ebx)
pcmpeqb %xmm4, %xmm0
@@ -1694,6 +1929,10 @@ ENTRY (STRCPY)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm1
movdqu %xmm4, (%edx, %ebx)
pcmpeqb %xmm1, %xmm0
@@ -1702,6 +1941,10 @@ ENTRY (STRCPY)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm2
movdqu %xmm1, (%edx, %ebx)
pcmpeqb %xmm2, %xmm0
@@ -1710,6 +1953,10 @@ ENTRY (STRCPY)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm3
movdqu %xmm2, (%edx, %ebx)
pcmpeqb %xmm3, %xmm0
@@ -1718,6 +1965,9 @@ ENTRY (STRCPY)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movdqu %xmm3, (%edx, %ebx)
mov %ecx, %eax
lea 16(%ecx, %ebx), %ecx
@@ -1726,6 +1976,9 @@ ENTRY (STRCPY)
sub %eax, %edx
L(Aligned64Loop):
+# ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+# endif
movaps (%ecx), %xmm2
movaps %xmm2, %xmm4
movaps 16(%ecx), %xmm5
@@ -1742,6 +1995,10 @@ L(Aligned64Loop):
test %eax, %eax
jnz L(Aligned64Leave)
L(Aligned64Loop_start):
+# ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+ bndcu -1(%edx), %bnd0
+# endif
movdqu %xmm4, -64(%edx)
movaps (%ecx), %xmm2
movdqa %xmm2, %xmm4
@@ -1771,6 +2028,9 @@ L(Aligned64Leave):
pcmpeqb %xmm5, %xmm0
pmovmskb %xmm0, %eax
+# ifdef __CHKP__
+ bndcu -49(%edx), %bnd0
+# endif
movdqu %xmm4, -64(%edx)
test %eax, %eax
lea 16(%ebx), %ebx
@@ -1778,11 +2038,17 @@ L(Aligned64Leave):
pcmpeqb %xmm6, %xmm0
pmovmskb %xmm0, %eax
+# ifdef __CHKP__
+ bndcu -33(%edx), %bnd0
+# endif
movdqu %xmm5, -48(%edx)
test %eax, %eax
lea 16(%ebx), %ebx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu -17(%edx), %bnd0
+# endif
movdqu %xmm6, -32(%edx)
pcmpeqb %xmm7, %xmm0
pmovmskb %xmm0, %eax
@@ -1813,6 +2079,10 @@ L(CopyFrom1To16Bytes):
test $0x40, %al
jnz L(Exit7)
/* Exit 8 */
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+ bndcu 7(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
@@ -1841,6 +2111,10 @@ L(ExitHigh):
test $0x40, %ah
jnz L(Exit15)
/* Exit 16 */
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 15(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 8(%ecx), %xmm0
@@ -1854,6 +2128,10 @@ L(ExitHigh):
.p2align 4
L(Exit1):
+# ifdef __CHKP__
+ bndcu (%edx), %bnd0
+ bndcu (%ecx), %bnd1
+# endif
movb (%ecx), %al
movb %al, (%edx)
# ifdef USE_AS_STPCPY
@@ -1865,6 +2143,10 @@ L(Exit1):
.p2align 4
L(Exit2):
+# ifdef __CHKP__
+ bndcu 1(%edx), %bnd0
+ bndcu 1(%ecx), %bnd1
+# endif
movw (%ecx), %ax
movw %ax, (%edx)
# ifdef USE_AS_STPCPY
@@ -1876,6 +2158,10 @@ L(Exit2):
.p2align 4
L(Exit3):
+# ifdef __CHKP__
+ bndcu 2(%edx), %bnd0
+ bndcu 2(%ecx), %bnd1
+# endif
movw (%ecx), %ax
movw %ax, (%edx)
movb 2(%ecx), %al
@@ -1889,6 +2175,10 @@ L(Exit3):
.p2align 4
L(Exit4):
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+ bndcu 3(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
# ifdef USE_AS_STPCPY
@@ -1900,6 +2190,10 @@ L(Exit4):
.p2align 4
L(Exit5):
+# ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+ bndcu 4(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movb 4(%ecx), %al
@@ -1913,6 +2207,10 @@ L(Exit5):
.p2align 4
L(Exit6):
+# ifdef __CHKP__
+ bndcu 5(%edx), %bnd0
+ bndcu 5(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movw 4(%ecx), %ax
@@ -1926,6 +2224,10 @@ L(Exit6):
.p2align 4
L(Exit7):
+# ifdef __CHKP__
+ bndcu 6(%edx), %bnd0
+ bndcu 6(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 3(%ecx), %eax
@@ -1939,6 +2241,10 @@ L(Exit7):
.p2align 4
L(Exit9):
+# ifdef __CHKP__
+ bndcu 8(%edx), %bnd0
+ bndcu 8(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
@@ -1954,6 +2260,10 @@ L(Exit9):
.p2align 4
L(Exit10):
+# ifdef __CHKP__
+ bndcu 9(%edx), %bnd0
+ bndcu 9(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
@@ -1969,6 +2279,10 @@ L(Exit10):
.p2align 4
L(Exit11):
+# ifdef __CHKP__
+ bndcu 10(%edx), %bnd0
+ bndcu 10(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
@@ -1984,6 +2298,10 @@ L(Exit11):
.p2align 4
L(Exit12):
+# ifdef __CHKP__
+ bndcu 11(%edx), %bnd0
+ bndcu 11(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
@@ -1999,6 +2317,10 @@ L(Exit12):
.p2align 4
L(Exit13):
+# ifdef __CHKP__
+ bndcu 12(%edx), %bnd0
+ bndcu 12(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 5(%ecx), %xmm0
@@ -2012,6 +2334,10 @@ L(Exit13):
.p2align 4
L(Exit14):
+# ifdef __CHKP__
+ bndcu 13(%edx), %bnd0
+ bndcu 13(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 6(%ecx), %xmm0
@@ -2025,6 +2351,10 @@ L(Exit14):
.p2align 4
L(Exit15):
+# ifdef __CHKP__
+ bndcu 14(%edx), %bnd0
+ bndcu 14(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 7(%ecx), %xmm0
@@ -2040,6 +2370,9 @@ CFI_POP (%edi)
.p2align 4
L(ExitTail1):
+# ifdef __CHKP__
+ bndcu (%edx), %bnd0
+# endif
movb (%ecx), %al
movb %al, (%edx)
movl %edx, %eax
@@ -2048,6 +2381,9 @@ L(ExitTail1):
.p2align 4
L(ExitTail2):
movw (%ecx), %ax
+# ifdef __CHKP__
+ bndcu 1(%edx), %bnd0
+# endif
movw %ax, (%edx)
# ifdef USE_AS_STPCPY
lea 1(%edx), %eax
@@ -2059,6 +2395,9 @@ L(ExitTail2):
.p2align 4
L(ExitTail3):
movw (%ecx), %ax
+# ifdef __CHKP__
+ bndcu 2(%edx), %bnd0
+# endif
movw %ax, (%edx)
movb 2(%ecx), %al
movb %al, 2(%edx)
@@ -2072,6 +2411,9 @@ L(ExitTail3):
.p2align 4
L(ExitTail4):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+# endif
movl %eax, (%edx)
# ifdef USE_AS_STPCPY
lea 3(%edx), %eax
@@ -2083,6 +2425,9 @@ L(ExitTail4):
.p2align 4
L(ExitTail5):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+# endif
movl %eax, (%edx)
movb 4(%ecx), %al
movb %al, 4(%edx)
@@ -2096,6 +2441,9 @@ L(ExitTail5):
.p2align 4
L(ExitTail6):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 5(%edx), %bnd0
+# endif
movl %eax, (%edx)
movw 4(%ecx), %ax
movw %ax, 4(%edx)
@@ -2109,6 +2457,9 @@ L(ExitTail6):
.p2align 4
L(ExitTail7):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 6(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 3(%ecx), %eax
movl %eax, 3(%edx)
@@ -2122,6 +2473,9 @@ L(ExitTail7):
.p2align 4
L(ExitTail8):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
@@ -2135,6 +2489,9 @@ L(ExitTail8):
.p2align 4
L(ExitTail9):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 8(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
@@ -2150,6 +2507,9 @@ L(ExitTail9):
.p2align 4
L(ExitTail10):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 9(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
@@ -2165,6 +2525,9 @@ L(ExitTail10):
.p2align 4
L(ExitTail11):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 10(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
@@ -2180,6 +2543,9 @@ L(ExitTail11):
.p2align 4
L(ExitTail12):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 11(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
@@ -2195,6 +2561,9 @@ L(ExitTail12):
.p2align 4
L(ExitTail13):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 12(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movlpd 5(%ecx), %xmm0
movlpd %xmm0, 5(%edx)
@@ -2208,6 +2577,9 @@ L(ExitTail13):
.p2align 4
L(ExitTail14):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 13(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movlpd 6(%ecx), %xmm0
movlpd %xmm0, 6(%edx)
@@ -2221,6 +2593,9 @@ L(ExitTail14):
.p2align 4
L(ExitTail15):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 14(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movlpd 7(%ecx), %xmm0
movlpd %xmm0, 7(%edx)
@@ -2234,6 +2609,9 @@ L(ExitTail15):
.p2align 4
L(ExitTail16):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movlpd 8(%ecx), %xmm0
movlpd %xmm0, 8(%edx)
diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
index 32db65cbd2..ab537c1f41 100644
--- a/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
+++ b/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
@@ -41,6 +41,11 @@
ENTRY ( __strlen_sse2_bsf)
ENTRANCE
mov STR(%esp), %edi
+#ifdef __CHKP__
+ bndldx STR(%esp,%edi,1), %bnd0
+ bndcl (%edi),%bnd0
+ bndcu (%edi),%bnd0
+#endif
xor %eax, %eax
mov %edi, %ecx
and $0x3f, %ecx
@@ -73,21 +78,33 @@ L(align16_start):
pxor %xmm3, %xmm3
.p2align 4
L(align16_loop):
+#ifdef __CHKP__
+ bndcu 16(%eax), %bnd0
+#endif
pcmpeqb 16(%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+#ifdef __CHKP__
+ bndcu 32(%eax), %bnd0
+#endif
pcmpeqb 32(%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+#ifdef __CHKP__
+ bndcu 48(%eax), %bnd0
+#endif
pcmpeqb 48(%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+#ifdef __CHKP__
+ bndcu 64(%eax), %bnd0
+#endif
pcmpeqb 64(%eax), %xmm3
pmovmskb %xmm3, %edx
lea 64(%eax), %eax
@@ -98,24 +115,36 @@ L(exit):
L(exit_less16):
bsf %edx, %edx
add %edx, %eax
+#ifdef __CHKP__
+ bndcu -1(%edi, %eax), %bnd0
+#endif
RETURN
L(exit16):
sub %edi, %eax
bsf %edx, %edx
add %edx, %eax
add $16, %eax
+#ifdef __CHKP__
+ bndcu -1(%edi, %eax), %bnd0
+#endif
RETURN
L(exit32):
sub %edi, %eax
bsf %edx, %edx
add %edx, %eax
add $32, %eax
+#ifdef __CHKP__
+ bndcu -1(%edi, %eax), %bnd0
+#endif
RETURN
L(exit48):
sub %edi, %eax
bsf %edx, %edx
add %edx, %eax
add $48, %eax
+#ifdef __CHKP__
+ bndcu -1(%edi, %eax), %bnd0
+#endif
POP (%edi)
POP (%esi)
ret
diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2.S b/sysdeps/i386/i686/multiarch/strlen-sse2.S
index a4f2806cfe..3d0743ec88 100644
--- a/sysdeps/i386/i686/multiarch/strlen-sse2.S
+++ b/sysdeps/i386/i686/multiarch/strlen-sse2.S
@@ -41,7 +41,10 @@
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
# define POP(REG) popl REG; CFI_POP (REG)
# undef RETURN
-# define RETURN POP (%edi); CFI_PUSH(%edi); ret
+# define RETURN \
+ mov STR+4(%esp),%edx; \
+ bndcu -1(%edx,%eax), %bnd0; \
+ POP (%edi); CFI_PUSH(%edi); ret
# endif
# ifndef STRLEN
@@ -51,12 +54,19 @@
atom_text_section
ENTRY (STRLEN)
mov STR(%esp), %edx
+# ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+# endif
# ifdef USE_AS_STRNLEN
PUSH (%edi)
movl LEN(%esp), %edi
sub $4, %edi
jbe L(len_less4_prolog)
# endif
+# ifdef __CHKP__
+ bndcl (%edx),%bnd0
+ bndcu (%edx),%bnd0
+# endif
# endif
xor %eax, %eax
cmpb $0, (%edx)
@@ -122,6 +132,9 @@ ENTRY (STRLEN)
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
@@ -129,6 +142,9 @@ ENTRY (STRLEN)
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
pxor %xmm2, %xmm2
@@ -136,6 +152,9 @@ ENTRY (STRLEN)
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3
@@ -143,6 +162,9 @@ ENTRY (STRLEN)
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -154,24 +176,36 @@ ENTRY (STRLEN)
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -183,24 +217,36 @@ ENTRY (STRLEN)
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -212,24 +258,36 @@ ENTRY (STRLEN)
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -250,6 +308,9 @@ L(aligned_64_loop):
sub $64, %edi
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movaps (%eax), %xmm0
movaps 16(%eax), %xmm1
movaps 32(%eax), %xmm2
@@ -535,6 +596,10 @@ L(len_less4_prolog):
add $4, %edi
jz L(exit_tail0)
+# ifdef __CHKP__
+ bndcl (%edx),%bnd0
+ bndcu (%edx),%bnd0
+# endif
cmpb $0, (%edx)
jz L(exit_tail0)
cmp $1, %edi
diff --git a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
index e026c40683..1c907a46de 100644
--- a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
+++ b/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
@@ -42,6 +42,12 @@ ENTRY (__strrchr_sse2_bsf)
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx), %bnd0
+ bndcu (%ecx), %bnd0
+# endif
+
PUSH (%edi)
pxor %xmm2, %xmm2
mov %ecx, %edi
@@ -90,6 +96,9 @@ L(unaligned_return_value1):
jz L(return_null)
bsr %eax, %eax
add %edi, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
POP (%edi)
ret
CFI_PUSH (%edi)
@@ -156,6 +165,9 @@ L(unaligned_return_value):
jz L(return_null)
bsr %eax, %eax
add %edi, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
POP (%edi)
ret
CFI_PUSH (%edi)
@@ -175,6 +187,9 @@ L(unaligned_match):
/* Loop start on aligned string. */
.p2align 4
L(loop):
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -184,6 +199,9 @@ L(loop):
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -193,6 +211,9 @@ L(loop):
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -202,6 +223,9 @@ L(loop):
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -224,6 +248,9 @@ L(return_value):
POP (%esi)
sub $16, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
POP (%edi)
ret
@@ -255,6 +282,9 @@ L(return_value_1):
bsr %eax, %eax
add %edi, %eax
sub $16, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
POP (%edi)
ret
diff --git a/sysdeps/i386/i686/multiarch/wcschr-sse2.S b/sysdeps/i386/i686/multiarch/wcschr-sse2.S
index 63101d9c9f..e06274aecf 100644
--- a/sysdeps/i386/i686/multiarch/wcschr-sse2.S
+++ b/sysdeps/i386/i686/multiarch/wcschr-sse2.S
@@ -40,7 +40,11 @@ ENTRY (__wcschr_sse2)
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
-
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx),%bnd0
+ bndcu (%ecx),%bnd0
+# endif
mov %ecx, %eax
punpckldq %xmm1, %xmm1
pxor %xmm2, %xmm2
@@ -90,6 +94,9 @@ L(cross_cache):
test $15, %dl
jnz L(return_null)
lea 4(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
CFI_PUSH (%edi)
@@ -108,6 +115,9 @@ L(unaligned_no_match):
.p2align 4
L(loop):
add $16, %ecx
+# ifdef __CHKP__
+ bndcu (%ecx),%bnd0
+# endif
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
@@ -117,6 +127,9 @@ L(loop):
jnz L(matches)
add $16, %ecx
+# ifdef __CHKP__
+ bndcu (%ecx),%bnd0
+# endif
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
@@ -126,6 +139,9 @@ L(loop):
jnz L(matches)
add $16, %ecx
+# ifdef __CHKP__
+ bndcu (%ecx),%bnd0
+# endif
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
@@ -135,6 +151,9 @@ L(loop):
jnz L(matches)
add $16, %ecx
+# ifdef __CHKP__
+ bndcu (%ecx),%bnd0
+# endif
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
@@ -160,11 +179,17 @@ L(match_case2):
test $15, %dl
jnz L(return_null)
lea 4(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
L(match_case2_4):
mov %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
@@ -176,11 +201,17 @@ L(match_higth_case2):
test $15, %dh
jnz L(return_null)
lea 12(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
L(match_case2_12):
lea 8(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
@@ -191,6 +222,9 @@ L(match_case1):
test $0x01, %al
jnz L(exit0)
lea 4(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
@@ -198,16 +232,25 @@ L(match_higth_case1):
test $0x01, %ah
jnz L(exit3)
lea 12(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
L(exit0):
mov %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
L(exit3):
lea 8(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
diff --git a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S b/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
index 9b248c1073..108e7fb914 100644
--- a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
+++ b/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
@@ -47,6 +47,14 @@ ENTRY (__wcscmp_sse2)
*/
mov STR1(%esp), %edx
mov STR2(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR1(%esp,%edx,1), %bnd0
+ bndldx STR2(%esp,%eax,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
mov (%eax), %ecx
cmp %ecx, (%edx)
diff --git a/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S b/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
index 47fb5164b3..708ef41040 100644
--- a/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
+++ b/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
@@ -41,13 +41,29 @@
ENTRY (__wcscpy_ssse3)
mov STR1(%esp), %edx
mov STR2(%esp), %ecx
+# ifdef __CHKP__
+ bndldx STR1(%esp,%edx,1), %bnd0
+ bndldx STR2(%esp,%ecx,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%ecx), %bnd1
+ bndcu (%ecx), %bnd1
+# endif
cmp $0, (%ecx)
jz L(ExitTail4)
+# ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+# endif
cmp $0, 4(%ecx)
jz L(ExitTail8)
+# ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+# endif
cmp $0, 8(%ecx)
jz L(ExitTail12)
+# ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+# endif
cmp $0, 12(%ecx)
jz L(ExitTail16)
@@ -61,6 +77,9 @@ ENTRY (__wcscpy_ssse3)
pxor %xmm0, %xmm0
pcmpeqd (%esi), %xmm0
movdqu (%ecx), %xmm1
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movdqu %xmm1, (%edx)
pmovmskb %xmm0, %eax
@@ -87,6 +106,10 @@ ENTRY (__wcscpy_ssse3)
jmp L(Shl12)
L(Align16Both):
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 16(%ecx), %bnd1
+# endif
movaps (%ecx), %xmm1
movaps 16(%ecx), %xmm2
movaps %xmm1, (%edx)
@@ -97,6 +120,10 @@ L(Align16Both):
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm3
movaps %xmm2, (%edx, %esi)
pcmpeqd %xmm3, %xmm0
@@ -106,6 +133,10 @@ L(Align16Both):
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm4
movaps %xmm3, (%edx, %esi)
pcmpeqd %xmm4, %xmm0
@@ -115,6 +146,10 @@ L(Align16Both):
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm1
movaps %xmm4, (%edx, %esi)
pcmpeqd %xmm1, %xmm0
@@ -124,6 +159,10 @@ L(Align16Both):
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm2
movaps %xmm1, (%edx, %esi)
pcmpeqd %xmm2, %xmm0
@@ -133,6 +172,10 @@ L(Align16Both):
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm3
movaps %xmm2, (%edx, %esi)
pcmpeqd %xmm3, %xmm0
@@ -142,6 +185,9 @@ L(Align16Both):
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+# endif
movaps %xmm3, (%edx, %esi)
mov %ecx, %eax
lea 16(%ecx, %esi), %ecx
@@ -152,6 +198,9 @@ L(Align16Both):
mov $-0x40, %esi
L(Aligned64Loop):
+# ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+# endif
movaps (%ecx), %xmm2
movaps 32(%ecx), %xmm3
movaps %xmm2, %xmm4
@@ -168,6 +217,9 @@ L(Aligned64Loop):
test %eax, %eax
jnz L(Aligned64Leave)
+# ifdef __CHKP__
+ bndcu -1(%edx), %bnd0
+# endif
movaps %xmm4, -64(%edx)
movaps %xmm5, -48(%edx)
movaps %xmm6, -32(%edx)
@@ -182,6 +234,9 @@ L(Aligned64Leave):
pcmpeqd %xmm5, %xmm0
pmovmskb %xmm0, %eax
+# ifdef __CHKP__
+ bndcu -49(%edx), %bnd0
+# endif
movaps %xmm4, -64(%edx)
test %eax, %eax
lea 16(%esi), %esi
@@ -189,11 +244,17 @@ L(Aligned64Leave):
pcmpeqd %xmm6, %xmm0
pmovmskb %xmm0, %eax
+# ifdef __CHKP__
+ bndcu -33(%edx), %bnd0
+# endif
movaps %xmm5, -48(%edx)
test %eax, %eax
lea 16(%esi), %esi
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu -17(%edx), %bnd0
+# endif
movaps %xmm6, -32(%edx)
pcmpeqd %xmm7, %xmm0
pmovmskb %xmm0, %eax
@@ -202,11 +263,17 @@ L(Aligned64Leave):
jnz L(CopyFrom1To16Bytes)
mov $-0x40, %esi
+# ifdef __CHKP__
+ bndcu -1(%edx), %bnd0
+# endif
movaps %xmm7, -16(%edx)
jmp L(Aligned64Loop)
.p2align 4
L(Shl4):
+# ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+# endif
movaps -4(%ecx), %xmm1
movaps 12(%ecx), %xmm2
L(Shl4Start):
@@ -218,6 +285,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 28(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
@@ -231,6 +302,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 28(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
@@ -244,6 +319,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 28(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
@@ -256,6 +335,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 28(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
lea 28(%ecx), %ecx
lea 16(%edx), %edx
@@ -269,6 +352,9 @@ L(Shl4Start):
movaps -4(%ecx), %xmm1
L(Shl4LoopStart):
+# ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+# endif
movaps 12(%ecx), %xmm2
movaps 28(%ecx), %xmm3
movaps %xmm3, %xmm6
@@ -290,6 +376,9 @@ L(Shl4LoopStart):
lea 64(%ecx), %ecx
palignr $4, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%edx), %bnd0
+# endif
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
@@ -300,6 +389,10 @@ L(Shl4LoopStart):
L(Shl4LoopExit):
movlpd (%ecx), %xmm0
movl 8(%ecx), %esi
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
+ movaps %xmm2, (%edx)
movlpd %xmm0, (%edx)
movl %esi, 8(%edx)
POP (%esi)
@@ -310,6 +403,9 @@ L(Shl4LoopExit):
test $0x01, %al
jnz L(Exit4)
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN
@@ -318,6 +414,9 @@ L(Shl4LoopExit):
.p2align 4
L(Shl8):
+# ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+# endif
movaps -8(%ecx), %xmm1
movaps 8(%ecx), %xmm2
L(Shl8Start):
@@ -329,6 +428,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 24(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
@@ -342,6 +445,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 24(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
@@ -355,6 +462,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 24(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
@@ -367,6 +478,9 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movaps %xmm2, (%edx)
lea 24(%ecx), %ecx
lea 16(%edx), %edx
@@ -380,6 +494,9 @@ L(Shl8Start):
movaps -8(%ecx), %xmm1
L(Shl8LoopStart):
+# ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+# endif
movaps 8(%ecx), %xmm2
movaps 24(%ecx), %xmm3
movaps %xmm3, %xmm6
@@ -401,6 +518,9 @@ L(Shl8LoopStart):
lea 64(%ecx), %ecx
palignr $8, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%edx), %bnd0
+# endif
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
@@ -410,6 +530,9 @@ L(Shl8LoopStart):
L(Shl8LoopExit):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
POP (%esi)
add $8, %edx
@@ -419,6 +542,9 @@ L(Shl8LoopExit):
test $0x01, %al
jnz L(Exit4)
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN
@@ -427,6 +553,9 @@ L(Shl8LoopExit):
.p2align 4
L(Shl12):
+# ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+# endif
movaps -12(%ecx), %xmm1
movaps 4(%ecx), %xmm2
L(Shl12Start):
@@ -438,6 +567,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 20(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
@@ -451,6 +584,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 20(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
@@ -464,6 +601,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 20(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
@@ -476,6 +617,9 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movaps %xmm2, (%edx)
lea 20(%ecx), %ecx
lea 16(%edx), %edx
@@ -489,6 +633,9 @@ L(Shl12Start):
movaps -12(%ecx), %xmm1
L(Shl12LoopStart):
+# ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+# endif
movaps 4(%ecx), %xmm2
movaps 20(%ecx), %xmm3
movaps %xmm3, %xmm6
@@ -510,6 +657,9 @@ L(Shl12LoopStart):
lea 64(%ecx), %ecx
palignr $12, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%edx), %bnd0
+# endif
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
@@ -519,6 +669,9 @@ L(Shl12LoopStart):
L(Shl12LoopExit):
movl (%ecx), %esi
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+# endif
movl %esi, (%edx)
mov $4, %esi
@@ -533,6 +686,10 @@ L(CopyFrom1To16Bytes):
test $0x01, %al
jnz L(Exit4)
L(Exit8):
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+ bndcu 7(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
@@ -543,6 +700,10 @@ L(ExitHigh):
test $0x01, %ah
jnz L(Exit12)
L(Exit16):
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 15(%ecx), %bnd1
+# endif
movdqu (%ecx), %xmm0
movdqu %xmm0, (%edx)
movl %edi, %eax
@@ -550,6 +711,10 @@ L(Exit16):
.p2align 4
L(Exit4):
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+ bndcu 3(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl %edi, %eax
@@ -557,6 +722,10 @@ L(Exit4):
.p2align 4
L(Exit12):
+# ifdef __CHKP__
+ bndcu 11(%edx), %bnd0
+ bndcu 11(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
@@ -569,6 +738,9 @@ CFI_POP (%edi)
.p2align 4
L(ExitTail4):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl %edx, %eax
ret
@@ -576,6 +748,9 @@ L(ExitTail4):
.p2align 4
L(ExitTail8):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movl %edx, %eax
ret
@@ -583,6 +758,9 @@ L(ExitTail8):
.p2align 4
L(ExitTail12):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 11(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
movl %eax, 8(%edx)
@@ -592,6 +770,9 @@ L(ExitTail12):
.p2align 4
L(ExitTail16):
movdqu (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movdqu %xmm0, (%edx)
movl %edx, %eax
ret
diff --git a/sysdeps/i386/i686/multiarch/wcslen-sse2.S b/sysdeps/i386/i686/multiarch/wcslen-sse2.S
index a92b92ffd3..9c53149261 100644
--- a/sysdeps/i386/i686/multiarch/wcslen-sse2.S
+++ b/sysdeps/i386/i686/multiarch/wcslen-sse2.S
@@ -24,21 +24,47 @@
.text
ENTRY (__wcslen_sse2)
mov STR(%esp), %edx
+# ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+ bndcl (%edx),%bnd0
+ bndcu (%edx),%bnd0
+# endif
cmp $0, (%edx)
jz L(exit_tail0)
+# ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+# endif
cmp $0, 4(%edx)
jz L(exit_tail1)
+# ifdef __CHKP__
+ bndcu 8(%edx), %bnd0
+# endif
cmp $0, 8(%edx)
jz L(exit_tail2)
+# ifdef __CHKP__
+ bndcu 12(%edx), %bnd0
+# endif
cmp $0, 12(%edx)
jz L(exit_tail3)
+# ifdef __CHKP__
+ bndcu 16(%edx), %bnd0
+# endif
cmp $0, 16(%edx)
jz L(exit_tail4)
+# ifdef __CHKP__
+ bndcu 20(%edx), %bnd0
+# endif
cmp $0, 20(%edx)
jz L(exit_tail5)
+# ifdef __CHKP__
+ bndcu 24(%edx), %bnd0
+# endif
cmp $0, 24(%edx)
jz L(exit_tail6)
+# ifdef __CHKP__
+ bndcu 28(%edx), %bnd0
+# endif
cmp $0, 28(%edx)
jz L(exit_tail7)
@@ -48,6 +74,9 @@ ENTRY (__wcslen_sse2)
lea 16(%edx), %ecx
and $-16, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
pcmpeqd (%eax), %xmm0
pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
@@ -55,6 +84,9 @@ ENTRY (__wcslen_sse2)
lea 16(%eax), %eax
jnz L(exit)
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
pcmpeqd (%eax), %xmm1
pmovmskb %xmm1, %edx
pxor %xmm2, %xmm2
@@ -62,6 +94,9 @@ ENTRY (__wcslen_sse2)
lea 16(%eax), %eax
jnz L(exit)
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
pcmpeqd (%eax), %xmm2
pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3
@@ -69,6 +104,9 @@ ENTRY (__wcslen_sse2)
lea 16(%eax), %eax
jnz L(exit)
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
pcmpeqd (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -79,6 +117,9 @@ ENTRY (__wcslen_sse2)
.p2align 4
L(aligned_64_loop):
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
movaps (%eax), %xmm0
movaps 16(%eax), %xmm1
movaps 32(%eax), %xmm2
@@ -129,6 +170,10 @@ L(exit):
mov %dl, %cl
and $15, %cl
jz L(exit_1)
+# ifdef __CHKP__
+ mov STR(%esp), %edx
+ bndcu -1(%edx, %eax, 4), %bnd0
+# endif
ret
.p2align 4
@@ -137,16 +182,28 @@ L(exit_high):
and $15, %ch
jz L(exit_3)
add $2, %eax
+# ifdef __CHKP__
+ mov STR(%esp), %edx
+ bndcu -1(%edx, %eax, 4), %bnd0
+# endif
ret
.p2align 4
L(exit_1):
add $1, %eax
+# ifdef __CHKP__
+ mov STR(%esp), %edx
+ bndcu -1(%edx, %eax, 4), %bnd0
+# endif
ret
.p2align 4
L(exit_3):
add $3, %eax
+# ifdef __CHKP__
+ mov STR(%esp), %edx
+ bndcu -1(%edx, %eax, 4), %bnd0
+# endif
ret
.p2align 4
diff --git a/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S b/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S
index d31e48e43f..f7c70e6de8 100644
--- a/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S
+++ b/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S
@@ -36,12 +36,23 @@
# define STR1 PARMS
# define STR2 STR1+4
+# ifdef __CHKP__
+# undef RETURN
+# define RETURN bndcu (%eax),%bnd0; \
+ POP (%edi); ret; CFI_PUSH (%edi);
+# endif
+
atom_text_section
ENTRY (__wcsrchr_sse2)
ENTRANCE
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx),%bnd0
+ bndcu (%ecx),%bnd0
+# endif
mov %ecx, %edi
punpckldq %xmm1, %xmm1
@@ -137,6 +148,9 @@ L(unaligned_match):
/* Loop start on aligned string. */
.p2align 4
L(loop):
+# ifdef __CHKP__
+ bndcu (%edi),%bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %edi
@@ -146,6 +160,9 @@ L(loop):
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi),%bnd0
+# endif
movdqa (%edi), %xmm3
pcmpeqd %xmm3, %xmm2
add $16, %edi
@@ -155,6 +172,9 @@ L(loop):
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi),%bnd0
+# endif
movdqa (%edi), %xmm4
pcmpeqd %xmm4, %xmm2
add $16, %edi
@@ -164,6 +184,9 @@ L(loop):
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi),%bnd0
+# endif
movdqa (%edi), %xmm5
pcmpeqd %xmm5, %xmm2
add $16, %edi
diff --git a/sysdeps/i386/i686/strcmp.S b/sysdeps/i386/i686/strcmp.S
index 6ca6220a02..67134af471 100644
--- a/sysdeps/i386/i686/strcmp.S
+++ b/sysdeps/i386/i686/strcmp.S
@@ -29,8 +29,19 @@ ENTRY (strcmp)
movl STR1(%esp), %ecx
movl STR2(%esp), %edx
-
-L(oop): movb (%ecx), %al
+#ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndldx STR2(%esp,%edx,1), %bnd1
+ bndcl (%ecx), %bnd0
+ bndcl (%edx), %bnd1
+#endif
+
+L(oop):
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd0
+ bndcu (%edx), %bnd1
+#endif
+ movb (%ecx), %al
cmpb (%edx), %al
jne L(neq)
incl %ecx
diff --git a/sysdeps/i386/i686/strtok.S b/sysdeps/i386/i686/strtok.S
index 8848faf4d9..78a2ea9fce 100644
--- a/sysdeps/i386/i686/strtok.S
+++ b/sysdeps/i386/i686/strtok.S
@@ -121,6 +121,14 @@ ENTRY (FUNCTION)
testl %edx, %edx
jz L(returnNULL)
movl DELIM(%esp), %eax /* Get start of delimiter set. */
+#ifdef __CHKP__
+ bndldx STR(%esp,%edx,1),%bnd0
+ bndldx DELIM(%esp,%eax,1),%bnd1
+ bndcl (%edx), %bnd0
+ bndcu (%edx), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu (%eax), %bnd1
+#endif
/* For understanding the following code remember that %ecx == 0 now.
Although all the following instruction only modify %cl we always
diff --git a/sysdeps/i386/memchr.S b/sysdeps/i386/memchr.S
index 67995002ed..39fe616974 100644
--- a/sysdeps/i386/memchr.S
+++ b/sysdeps/i386/memchr.S
@@ -51,6 +51,11 @@ ENTRY (__memchr)
movl LEN(%esp), %esi /* len: length of memory block. */
cfi_rel_offset (esi, 4)
+#ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcl (%eax), %bnd0
+#endif
+
/* If my must not test more than three characters test
them one by one. This is especially true for 0. */
cmpl $4, %esi
@@ -72,6 +77,9 @@ ENTRY (__memchr)
testb $3, %al /* correctly aligned ? */
je L(2) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
@@ -80,6 +88,9 @@ ENTRY (__memchr)
testb $3, %al /* correctly aligned ? */
je L(2) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
@@ -88,6 +99,9 @@ ENTRY (__memchr)
testb $3, %al /* correctly aligned ? */
je L(2) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
@@ -127,7 +141,11 @@ ENTRY (__memchr)
ALIGN (4)
-L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
+L(1):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
@@ -162,6 +180,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
(following LL(13) below). Even the len can be compared with
constants instead of decrementing each time. */
+#ifdef __CHKP__
+ bndcu 4(%eax), %bnd0
+#endif
movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -176,6 +197,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(7) /* found it => return pointer */
+#ifdef __CHKP__
+ bndcu 8(%eax), %bnd0
+#endif
movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -190,6 +214,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(6) /* found it => return pointer */
+#ifdef __CHKP__
+ bndcu 12(%eax), %bnd0
+#endif
movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -213,6 +240,9 @@ L(2): subl $16, %esi
cmpl $4-16, %esi /* rest < 4 bytes? */
jb L(3) /* yes, than test byte by byte */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -231,6 +261,9 @@ L(2): subl $16, %esi
cmpl $8-16, %esi /* rest < 8 bytes? */
jb L(3) /* yes, than test byte by byte */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -249,6 +282,9 @@ L(2): subl $16, %esi
cmpl $12-16, %esi /* rest < 12 bytes? */
jb L(3) /* yes, than test byte by byte */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -268,18 +304,27 @@ L(2): subl $16, %esi
L(3): andl $3, %esi /* mask out uninteresting bytes */
jz L(4) /* no remaining bytes => return NULL */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte with CHR */
je L(9) /* equal, than return pointer */
incl %eax /* increment source pointer */
decl %esi /* decrement length */
jz L(4) /* no remaining bytes => return NULL */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte with CHR */
je L(9) /* equal, than return pointer */
incl %eax /* increment source pointer */
decl %esi /* decrement length */
jz L(4) /* no remaining bytes => return NULL */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte with CHR */
je L(9) /* equal, than return pointer */
diff --git a/sysdeps/i386/memcmp.S b/sysdeps/i386/memcmp.S
index 21e0bfcd1c..7beab65325 100644
--- a/sysdeps/i386/memcmp.S
+++ b/sysdeps/i386/memcmp.S
@@ -37,6 +37,12 @@ ENTRY (memcmp)
cfi_rel_offset (esi, 0)
movl BLK2(%esp), %edi
movl LEN(%esp), %ecx
+#ifdef __CHKP__
+ bndldx BLK1(%esp,%esi,1), %bnd0
+ bndldx BLK2(%esp,%edi,1), %bnd1
+ bndcl (%esi), %bnd0
+ bndcl (%edi), %bnd1
+#endif
cld /* Set direction of comparison. */
@@ -59,7 +65,13 @@ ENTRY (memcmp)
Note that the following operation does not change 0xffffffff. */
orb $1, %al /* Change 0 to 1. */
-L(1): popl %esi /* Restore registers. */
+L(1):
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+ bndcu (%edi), %bnd1
+#endif
+ popl %esi /* Restore registers. */
+
cfi_adjust_cfa_offset (-4)
cfi_restore (esi)
movl %edx, %edi
diff --git a/sysdeps/i386/rawmemchr.S b/sysdeps/i386/rawmemchr.S
index 2bd20e0459..27441ddf73 100644
--- a/sysdeps/i386/rawmemchr.S
+++ b/sysdeps/i386/rawmemchr.S
@@ -46,6 +46,11 @@ ENTRY (__rawmemchr)
movl STR(%esp), %eax
movl CHR(%esp), %edx
+#ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcl (%eax), %bnd0
+#endif
+
/* At the moment %edx contains C. What we need for the
algorithm is C in all bytes of the dword. Avoid
operations on 16 bit words because these require an
@@ -62,18 +67,27 @@ ENTRY (__rawmemchr)
testb $3, %al /* correctly aligned ? */
je L(1) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
testb $3, %al /* correctly aligned ? */
je L(1) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
testb $3, %al /* correctly aligned ? */
je L(1) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
@@ -108,7 +122,11 @@ ENTRY (__rawmemchr)
/* Each round the main loop processes 16 bytes. */
ALIGN (4)
-L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
+L(1):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
@@ -143,6 +161,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
(following LL(13) below). Even the len can be compared with
constants instead of decrementing each time. */
+#ifdef __CHKP__
+ bndcu 4(%eax), %bnd0
+#endif
movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -157,6 +178,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(7) /* found it => return pointer */
+#ifdef __CHKP__
+ bndcu 8(%eax), %bnd0
+#endif
movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -171,6 +195,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(6) /* found it => return pointer */
+#ifdef __CHKP__
+ bndcu 12(%eax), %bnd0
+#endif
movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -211,6 +238,9 @@ L(8): testb %cl, %cl /* test first byte in dword */
/* No further test needed we we know it is one of the four bytes. */
L(9):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
popl %edi /* pop saved register */
cfi_adjust_cfa_offset (-4)
cfi_restore (edi)
diff --git a/sysdeps/i386/stpncpy.S b/sysdeps/i386/stpncpy.S
index b23e8208a0..22d727a356 100644
--- a/sysdeps/i386/stpncpy.S
+++ b/sysdeps/i386/stpncpy.S
@@ -42,6 +42,14 @@ ENTRY (__stpncpy)
movl SRC(%esp), %esi
cfi_rel_offset (esi, 0)
movl LEN(%esp), %ecx
+#ifdef __CHKP__
+ bndldx DEST(%esp,%eax,1), %bnd0
+ bndldx SRC(%esp,%esi,1), %bnd1
+ bndcl (%eax), %bnd0
+ bndcu -1(%eax, %ecx), %bnd0
+ bndcl (%esi), %bnd1
+ bndcu (%esi), %bnd1
+#endif
subl %eax, %esi /* magic: reduce number of loop variants
to one using addressing mode */
diff --git a/sysdeps/i386/strchrnul.S b/sysdeps/i386/strchrnul.S
index 7ceb88ed8b..86bf770aef 100644
--- a/sysdeps/i386/strchrnul.S
+++ b/sysdeps/i386/strchrnul.S
@@ -38,6 +38,11 @@ ENTRY (__strchrnul)
movl STR(%esp), %eax
movl CHR(%esp), %edx
+# ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcl (%eax), %bnd0
+ bndcu (%eax), %bnd0
+# endif
/* At the moment %edx contains CHR. What we need for the
algorithm is CHR in all bytes of the dword. Avoid
operations on 16 bit words because these require an
@@ -60,6 +65,9 @@ ENTRY (__strchrnul)
testb $3, %al /* correctly aligned ? */
jz L(11) /* yes => begin loop */
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
movb (%eax), %cl /* load byte in question (we need it twice) */
cmpb %cl, %dl /* compare byte */
je L(6) /* target found => return */
@@ -69,6 +77,9 @@ ENTRY (__strchrnul)
testb $3, %al /* correctly aligned ? */
jz L(11) /* yes => begin loop */
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
movb (%eax), %cl /* load byte in question (we need it twice) */
cmpb %cl, %dl /* compare byte */
je L(6) /* target found => return */
@@ -78,6 +89,9 @@ ENTRY (__strchrnul)
testb $3, %al /* correctly aligned ? */
jz L(11) /* yes => begin loop */
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
movb (%eax), %cl /* load byte in question (we need it twice) */
cmpb %cl, %dl /* compare byte */
je L(6) /* target found => return */
@@ -120,7 +134,11 @@ ENTRY (__strchrnul)
L(1): addl $16, %eax /* adjust pointer for whole round */
-L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
+L(11):
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
movl $0xfefefeff, %edi /* magic value */
@@ -164,6 +182,9 @@ L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(7) /* found NUL => return NULL */
+# ifdef __CHKP__
+ bndcu 4(%eax), %bnd0
+# endif
movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
@@ -189,6 +210,9 @@ L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(71) /* found NUL => return NULL */
+# ifdef __CHKP__
+ bndcu 8(%eax), %bnd0
+# endif
movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
@@ -214,6 +238,9 @@ L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(72) /* found NUL => return NULL */
+# ifdef __CHKP__
+ bndcu 12(%eax), %bnd0
+# endif
movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
@@ -268,7 +295,11 @@ L(7): testb %cl, %cl /* is first byte CHR? */
/* It must be in the fourth byte and it cannot be NUL. */
incl %eax
-L(6): popl %edi /* restore saved register content */
+L(6):
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
+ popl %edi /* restore saved register content */
cfi_adjust_cfa_offset (-4)
cfi_restore (edi)
diff --git a/sysdeps/i386/strcspn.S b/sysdeps/i386/strcspn.S
index 0c262d6001..1352b038f0 100644
--- a/sysdeps/i386/strcspn.S
+++ b/sysdeps/i386/strcspn.S
@@ -32,6 +32,14 @@ ENTRY (strcspn)
movl STR(%esp), %edx
movl STOP(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+ bndldx STOP(%esp,%eax,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
/* First we create a table with flags for all possible characters.
For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
diff --git a/sysdeps/i386/strpbrk.S b/sysdeps/i386/strpbrk.S
index 246ae27c53..7190a064ac 100644
--- a/sysdeps/i386/strpbrk.S
+++ b/sysdeps/i386/strpbrk.S
@@ -33,6 +33,14 @@ ENTRY (strpbrk)
movl STR(%esp), %edx
movl STOP(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+ bndldx STOP(%esp,%eax,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
/* First we create a table with flags for all possible characters.
For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
diff --git a/sysdeps/i386/strrchr.S b/sysdeps/i386/strrchr.S
index 31b8a4562c..858bba463c 100644
--- a/sysdeps/i386/strrchr.S
+++ b/sysdeps/i386/strrchr.S
@@ -40,6 +40,10 @@ ENTRY (strrchr)
movl STR(%esp), %esi
cfi_rel_offset (esi, 0)
movl CHR(%esp), %ecx
+#ifdef __CHKP__
+ bndldx STR(%esp,%esi,1), %bnd0
+ bndcl (%esi), %bnd0
+#endif
/* At the moment %ecx contains C. What we need for the
algorithm is C in all bytes of the dword. Avoid
@@ -63,6 +67,9 @@ ENTRY (strrchr)
testl $3, %esi /* correctly aligned ? */
jz L(19) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+#endif
movb (%esi), %dl /* load byte in question (we need it twice) */
cmpb %dl, %cl /* compare byte */
jne L(11) /* target found => return */
@@ -73,6 +80,9 @@ L(11): orb %dl, %dl /* is NUL? */
testl $3, %esi /* correctly aligned ? */
jz L(19) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+#endif
movb (%esi), %dl /* load byte in question (we need it twice) */
cmpb %dl, %cl /* compare byte */
jne L(12) /* target found => return */
@@ -83,6 +93,9 @@ L(12): orb %dl, %dl /* is NUL? */
testl $3, %esi /* correctly aligned ? */
jz L(19) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+#endif
movb (%esi), %dl /* load byte in question (we need it twice) */
cmpb %dl, %cl /* compare byte */
jne L(13) /* target found => return */
@@ -170,7 +183,11 @@ L(51):
L(1): addl $16, %esi /* increment pointer for full round */
-L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */
+L(19):
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+#endif
+ movl (%esi), %edx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
addl %edx, %edi /* add the magic value to the word. We get
carry bits reported for each byte which
@@ -214,6 +231,9 @@ L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(3) /* C is detected in the word => examine it */
+#ifdef __CHKP__
+ bndcu 4(%esi), %bnd0
+#endif
movl 4(%esi), %edx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
addl %edx, %edi /* add the magic value to the word. We get
@@ -238,6 +258,9 @@ L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(31) /* C is detected in the word => examine it */
+#ifdef __CHKP__
+ bndcu 8(%esi), %bnd0
+#endif
movl 8(%esi), %edx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
addl %edx, %edi /* add the magic value to the word. We get
@@ -262,6 +285,9 @@ L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(32) /* C is detected in the word => examine it */
+#ifdef __CHKP__
+ bndcu 12(%esi), %bnd0
+#endif
movl 12(%esi), %edx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
addl %edx, %edi /* add the magic value to the word. We get
diff --git a/sysdeps/i386/strtok.S b/sysdeps/i386/strtok.S
index 79d540b603..cfee507da2 100644
--- a/sysdeps/i386/strtok.S
+++ b/sysdeps/i386/strtok.S
@@ -67,6 +67,11 @@ ENTRY (FUNCTION)
movl STR(%esp), %edx
movl DELIM(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+ bndldx DELIM(%esp,%eax,1), %bnd1
+#endif
+
#if !defined USE_AS_STRTOK_R && defined PIC
pushl %ebx /* Save PIC register. */
cfi_adjust_cfa_offset (4)
@@ -336,6 +341,9 @@ L(11):
/* Store the pointer to the next character. */
#ifdef USE_AS_STRTOK_R
movl SAVE(%esp), %ecx
+# ifdef __CHKP__
+ bndmov %bnd2, %bnd0
+# endif
#endif
movl %edx, SAVE_PTR
@@ -351,6 +359,9 @@ L(returnNULL):
xorl %eax, %eax
#ifdef USE_AS_STRTOK_R
movl SAVE(%esp), %ecx
+# ifdef __CHKP__
+ bndmov %bnd2, %bnd0
+# endif
#endif
movl %edx, SAVE_PTR
jmp L(epilogue)