From 01d5454d13d2c21b9a08b28441d37a7ddce089a6 Mon Sep 17 00:00:00 2001 From: Liubov Dmitrieva Date: Fri, 24 May 2013 13:18:17 +0400 Subject: Implemented bounds check support for string/memory routines for x86_32. Warning: Not completed and haven't tested. --- sysdeps/i386/i486/strcat.S | 79 +- sysdeps/i386/i586/strchr.S | 31 +- sysdeps/i386/i586/strcpy.S | 7 + sysdeps/i386/i586/strlen.S | 22 +- sysdeps/i386/i686/memcmp.S | 172 +++ sysdeps/i386/i686/memset.S | 5 + sysdeps/i386/i686/multiarch/Makefile | 7 +- sysdeps/i386/i686/multiarch/Versions | 7 + sysdeps/i386/i686/multiarch/__bcopy.S | 81 + sysdeps/i386/i686/multiarch/__memcpy.S | 82 ++ sysdeps/i386/i686/multiarch/__memcpy_chk.S | 54 + sysdeps/i386/i686/multiarch/__memmove.S | 109 ++ sysdeps/i386/i686/multiarch/__memmove_chk.S | 102 ++ sysdeps/i386/i686/multiarch/__mempcpy.S | 85 ++ sysdeps/i386/i686/multiarch/__mempcpy_chk.S | 54 + sysdeps/i386/i686/multiarch/bcopy.S | 81 - sysdeps/i386/i686/multiarch/bcopy.c | 7 + sysdeps/i386/i686/multiarch/ifunc-impl-list.c | 86 +- sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S | 82 +- sysdeps/i386/i686/multiarch/memcmp-sse4.S | 53 + sysdeps/i386/i686/multiarch/memcpy.S | 82 -- sysdeps/i386/i686/multiarch/memcpy.c | 40 + sysdeps/i386/i686/multiarch/memcpy_chk.S | 54 - sysdeps/i386/i686/multiarch/memcpy_chk.c | 1 + sysdeps/i386/i686/multiarch/memmove.S | 109 -- sysdeps/i386/i686/multiarch/memmove.c | 76 + sysdeps/i386/i686/multiarch/memmove_chk.S | 102 -- sysdeps/i386/i686/multiarch/memmove_chk.c | 1 + sysdeps/i386/i686/multiarch/mempcpy.S | 85 -- sysdeps/i386/i686/multiarch/mempcpy.c | 40 + sysdeps/i386/i686/multiarch/mempcpy_chk.S | 54 - sysdeps/i386/i686/multiarch/mempcpy_chk.c | 1 + sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S | 6 + sysdeps/i386/i686/multiarch/memset-sse2-rep.S | 6 + sysdeps/i386/i686/multiarch/mpx_memcpy_nobnd.S | 1803 +++++++++++++++++++++++ sysdeps/i386/i686/multiarch/mpx_memmove_nobnd.S | 3 + sysdeps/i386/i686/multiarch/mpx_mempcpy_nobnd.S | 3 + sysdeps/i386/i686/multiarch/strcat-sse2.S | 186 +++ sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S | 24 + sysdeps/i386/i686/multiarch/strcmp-sse4.S | 78 + sysdeps/i386/i686/multiarch/strcpy-sse2.S | 380 ++++- sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S | 29 + sysdeps/i386/i686/multiarch/strlen-sse2.S | 67 +- sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S | 30 + sysdeps/i386/i686/multiarch/wcschr-sse2.S | 45 +- sysdeps/i386/i686/multiarch/wcscmp-sse2.S | 8 + sysdeps/i386/i686/multiarch/wcscpy-ssse3.S | 181 +++ sysdeps/i386/i686/multiarch/wcslen-sse2.S | 57 + sysdeps/i386/i686/multiarch/wcsrchr-sse2.S | 23 + sysdeps/i386/i686/strcmp.S | 15 +- sysdeps/i386/i686/strtok.S | 8 + sysdeps/i386/memchr.S | 47 +- sysdeps/i386/memcmp.S | 14 +- sysdeps/i386/rawmemchr.S | 32 +- sysdeps/i386/stpncpy.S | 8 + sysdeps/i386/strchrnul.S | 35 +- sysdeps/i386/strcspn.S | 8 + sysdeps/i386/strpbrk.S | 8 + sysdeps/i386/strrchr.S | 28 +- sysdeps/i386/strtok.S | 11 + 60 files changed, 4360 insertions(+), 634 deletions(-) create mode 100644 sysdeps/i386/i686/multiarch/__bcopy.S create mode 100644 sysdeps/i386/i686/multiarch/__memcpy.S create mode 100644 sysdeps/i386/i686/multiarch/__memcpy_chk.S create mode 100644 sysdeps/i386/i686/multiarch/__memmove.S create mode 100644 sysdeps/i386/i686/multiarch/__memmove_chk.S create mode 100644 sysdeps/i386/i686/multiarch/__mempcpy.S create mode 100644 sysdeps/i386/i686/multiarch/__mempcpy_chk.S delete mode 100644 sysdeps/i386/i686/multiarch/bcopy.S create mode 100644 sysdeps/i386/i686/multiarch/bcopy.c delete mode 100644 sysdeps/i386/i686/multiarch/memcpy.S create mode 100644 sysdeps/i386/i686/multiarch/memcpy.c delete mode 100644 sysdeps/i386/i686/multiarch/memcpy_chk.S create mode 100644 sysdeps/i386/i686/multiarch/memcpy_chk.c delete mode 100644 sysdeps/i386/i686/multiarch/memmove.S create mode 100644 sysdeps/i386/i686/multiarch/memmove.c delete mode 100644 sysdeps/i386/i686/multiarch/memmove_chk.S create mode 100644 sysdeps/i386/i686/multiarch/memmove_chk.c delete mode 100644 sysdeps/i386/i686/multiarch/mempcpy.S create mode 100644 sysdeps/i386/i686/multiarch/mempcpy.c delete mode 100644 sysdeps/i386/i686/multiarch/mempcpy_chk.S create mode 100644 sysdeps/i386/i686/multiarch/mempcpy_chk.c create mode 100644 sysdeps/i386/i686/multiarch/mpx_memcpy_nobnd.S create mode 100644 sysdeps/i386/i686/multiarch/mpx_memmove_nobnd.S create mode 100644 sysdeps/i386/i686/multiarch/mpx_mempcpy_nobnd.S diff --git a/sysdeps/i386/i486/strcat.S b/sysdeps/i386/i486/strcat.S index 7d45862a2a..af2602efd6 100644 --- a/sysdeps/i386/i486/strcat.S +++ b/sysdeps/i386/i486/strcat.S @@ -35,9 +35,19 @@ ENTRY (strcat) movl DEST(%esp), %edx movl SRC(%esp), %ecx +#ifdef __CHKP__ + bndldx DEST(%esp,%edx,1), %bnd0 + bndldx SRC(%esp,%ecx,1), %bnd1 + bndcl (%ecx), %bnd1 + bndcu (%ecx), %bnd1 +#endif testb $0xff, (%ecx) /* Is source string empty? */ jz L(8) /* yes => return */ +#ifdef __CHKP__ + bndcl (%edx), %bnd0 + bndcu (%edx), %bnd0 +#endif /* Test the first bytes separately until destination is aligned. */ testl $3, %edx /* destination pointer aligned? */ @@ -66,7 +76,11 @@ ENTRY (strcat) L(4): addl $16,%edx /* increment destination pointer for round */ -L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */ +L(1): +#ifdef __CHKP__ + bndcu (%edx), %bnd0 +#endif + movl (%edx), %eax /* get word (= 4 bytes) in question */ movl $0xfefefeff, %edi /* magic value */ /* If you compare this with the algorithm in memchr.S you will @@ -98,6 +112,9 @@ L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */ /* If at least one byte of the word is C we don't get 0 in %ecx. */ jnz L(3) +#ifdef __CHKP__ + bndcu 4(%edx), %bnd0 +#endif movl 4(%edx), %eax /* get word from source */ movl $0xfefefeff, %edi /* magic value */ addl %eax, %edi /* add the magic value to the word. We get @@ -110,6 +127,9 @@ L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */ the addition will not result in 0. */ jnz L(5) /* one byte is NUL => stop copying */ +#ifdef __CHKP__ + bndcu 8(%edx), %bnd0 +#endif movl 8(%edx), %eax /* get word from source */ movl $0xfefefeff, %edi /* magic value */ addl %eax, %edi /* add the magic value to the word. We get @@ -122,6 +142,9 @@ L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */ the addition will not result in 0. */ jnz L(6) /* one byte is NUL => stop copying */ +#ifdef __CHKP__ + bndcu 12(%edx), %bnd0 +#endif movl 12(%edx), %eax /* get word from source */ movl $0xfefefeff, %edi /* magic value */ addl %eax, %edi /* add the magic value to the word. We get @@ -155,6 +178,10 @@ L(2): subl %ecx, %edx /* reduce number of loop variants */ /* Now we have to align the source pointer. */ testl $3, %ecx /* pointer correctly aligned? */ jz L(29) /* yes => start copy loop */ +#ifdef __CHKP__ + bndcu (%ecx), %bnd1 + bndcu (%ecx, %edx), %bnd0 +#endif movb (%ecx), %al /* get first byte */ movb %al, (%ecx,%edx) /* and store it */ andb %al, %al /* is byte NUL? */ @@ -163,6 +190,10 @@ L(2): subl %ecx, %edx /* reduce number of loop variants */ testl $3, %ecx /* pointer correctly aligned? */ jz L(29) /* yes => start copy loop */ +#ifdef __CHKP__ + bndcu (%ecx), %bnd1 + bndcu (%ecx, %edx), %bnd0 +#endif movb (%ecx), %al /* get first byte */ movb %al, (%ecx,%edx) /* and store it */ andb %al, %al /* is byte NUL? */ @@ -171,6 +202,10 @@ L(2): subl %ecx, %edx /* reduce number of loop variants */ testl $3, %ecx /* pointer correctly aligned? */ jz L(29) /* yes => start copy loop */ +#ifdef __CHKP__ + bndcu (%ecx), %bnd1 + bndcu (%ecx, %edx), %bnd0 +#endif movb (%ecx), %al /* get first byte */ movb %al, (%ecx,%edx) /* and store it */ andb %al, %al /* is byte NUL? */ @@ -182,10 +217,18 @@ L(2): subl %ecx, %edx /* reduce number of loop variants */ ALIGN(4) -L(28): movl %eax, 12(%ecx,%edx)/* store word at destination */ +L(28): +#ifdef __CHKP__ + bndcu 12(%ecx, %edx), %bnd0 +#endif + movl %eax, 12(%ecx,%edx)/* store word at destination */ addl $16, %ecx /* adjust pointer for full round */ -L(29): movl (%ecx), %eax /* get word from source */ +L(29): +#ifdef __CHKP__ + bndcu (%ecx), %bnd1 +#endif + movl (%ecx), %eax /* get word from source */ movl $0xfefefeff, %edi /* magic value */ addl %eax, %edi /* add the magic value to the word. We get carry bits reported for each byte which @@ -196,8 +239,14 @@ L(29): movl (%ecx), %eax /* get word from source */ incl %edi /* add 1: if one carry bit was *not* set the addition will not result in 0. */ jnz L(9) /* one byte is NUL => stop copying */ +#ifdef __CHKP__ + bndcu (%ecx, %edx), %bnd0 +#endif movl %eax, (%ecx,%edx) /* store word to destination */ +#ifdef __CHKP__ + bndcu 4(%ecx), %bnd1 +#endif movl 4(%ecx), %eax /* get word from source */ movl $0xfefefeff, %edi /* magic value */ addl %eax, %edi /* add the magic value to the word. We get @@ -209,8 +258,14 @@ L(29): movl (%ecx), %eax /* get word from source */ incl %edi /* add 1: if one carry bit was *not* set the addition will not result in 0. */ jnz L(91) /* one byte is NUL => stop copying */ +#ifdef __CHKP__ + bndcu 4(%ecx, %edx), %bnd0 +#endif movl %eax, 4(%ecx,%edx) /* store word to destination */ +#ifdef __CHKP__ + bndcu 8(%ecx), %bnd1 +#endif movl 8(%ecx), %eax /* get word from source */ movl $0xfefefeff, %edi /* magic value */ addl %eax, %edi /* add the magic value to the word. We get @@ -222,8 +277,14 @@ L(29): movl (%ecx), %eax /* get word from source */ incl %edi /* add 1: if one carry bit was *not* set the addition will not result in 0. */ jnz L(92) /* one byte is NUL => stop copying */ +#ifdef __CHKP__ + bndcu 8(%ecx, %edx), %bnd0 +#endif movl %eax, 8(%ecx,%edx) /* store word to destination */ +#ifdef __CHKP__ + bndcu 12(%ecx), %bnd1 +#endif movl 12(%ecx), %eax /* get word from source */ movl $0xfefefeff, %edi /* magic value */ addl %eax, %edi /* add the magic value to the word. We get @@ -240,15 +301,25 @@ L(93): addl $4, %ecx /* adjust pointer */ L(92): addl $4, %ecx L(91): addl $4, %ecx -L(9): movb %al, (%ecx,%edx) /* store first byte of last word */ +L(9): +#ifdef __CHKP__ + bndcu (%ecx, %edx), %bnd0 +#endif + movb %al, (%ecx,%edx) /* store first byte of last word */ orb %al, %al /* is it NUL? */ jz L(8) /* yes => return */ +#ifdef __CHKP__ + bndcu 1(%ecx, %edx), %bnd0 +#endif movb %ah, 1(%ecx,%edx) /* store second byte of last word */ orb %ah, %ah /* is it NUL? */ jz L(8) /* yes => return */ shrl $16, %eax /* make upper bytes accessible */ +#ifdef __CHKP__ + bndcu 2(%ecx, %edx), %bnd0 +#endif movb %al, 2(%ecx,%edx) /* store third byte of last word */ orb %al, %al /* is it NUL? */ jz L(8) /* yes => return */ diff --git a/sysdeps/i386/i586/strchr.S b/sysdeps/i386/i586/strchr.S index 648d52830d..4efa935771 100644 --- a/sysdeps/i386/i586/strchr.S +++ b/sysdeps/i386/i586/strchr.S @@ -54,6 +54,10 @@ ENTRY (strchr) movl STR(%esp), %eax movl CHR(%esp), %edx +#ifdef __CHKP__ + bndldx STR(%esp,%eax,1), %bnd0 + bndcl (%eax), %bnd0 +#endif movl %eax, %edi /* duplicate string pointer for later */ cfi_rel_offset (edi, 12) @@ -83,6 +87,9 @@ ENTRY (strchr) xorb %dl, %cl /* load single byte and test for NUL */ je L(3) /* yes => return NULL */ +#ifdef __CHKP__ + bndcu 1(%eax), %bnd0 +#endif movb 1(%eax), %cl /* load single byte */ incl %eax @@ -97,7 +104,11 @@ ENTRY (strchr) jne L(11) -L(0): movb (%eax), %cl /* load single byte */ +L(0): +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif + movb (%eax), %cl /* load single byte */ cmpb %cl, %dl /* is byte == C? */ je L(out) /* aligned => return pointer */ @@ -115,7 +126,11 @@ L(0): movb (%eax), %cl /* load single byte */ four instruction up to `L1' will not be executed in the loop because the same code is found at the end of the loop, but there it is executed in parallel with other instructions. */ -L(11): movl (%eax), %ecx +L(11): +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif + movl (%eax), %ecx movl $magic, %ebp movl $magic, %edi @@ -159,6 +174,9 @@ L(1): xorl %ecx, %ebp /* (word^magic) */ movl $magic, %esi /* load magic value */ xorl %edx, %ebx /* clear words which are C */ +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif movl (%eax), %ecx addl %ebx, %esi /* (word+magic) */ @@ -189,6 +207,9 @@ L(1): xorl %ecx, %ebp /* (word^magic) */ movl $magic, %esi xorl %edx, %ebx +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif movl (%eax), %ecx addl %ebx, %esi @@ -219,6 +240,9 @@ L(1): xorl %ecx, %ebp /* (word^magic) */ movl $magic, %esi xorl %edx, %ebx +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif movl (%eax), %ecx addl %ebx, %esi @@ -249,6 +273,9 @@ L(1): xorl %ecx, %ebp /* (word^magic) */ movl $magic, %esi xorl %edx, %ebx +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif movl (%eax), %ecx addl %ebx, %esi diff --git a/sysdeps/i386/i586/strcpy.S b/sysdeps/i386/i586/strcpy.S index c940369342..6392a8e3d4 100644 --- a/sysdeps/i386/i586/strcpy.S +++ b/sysdeps/i386/i586/strcpy.S @@ -45,6 +45,10 @@ ENTRY (STRCPY) cfi_rel_offset (edi, 8) movl SRC(%esp), %esi cfi_rel_offset (esi, 4) +#ifdef __CHKP__ + bndldx DEST(%esp,%edi,1), %bnd0 + bndldx SRC(%esp,%esi,1), %bnd1 +#endif xorl %eax, %eax leal -1(%esi), %ecx @@ -61,6 +65,9 @@ ENTRY (STRCPY) /* 0xb is the distance between 2: and 1: but we avoid writing 1f-2b because the assembler generates worse code. */ leal 0xb(%edx,%ecx,8), %ecx +# ifdef __CHKP__ + jmp L(1) +# endif #else leal 1f(,%ecx,8), %ecx #endif diff --git a/sysdeps/i386/i586/strlen.S b/sysdeps/i386/i586/strlen.S index b50fffa1fb..9034625e95 100644 --- a/sysdeps/i386/i586/strlen.S +++ b/sysdeps/i386/i586/strlen.S @@ -41,6 +41,10 @@ ENTRY (strlen) movl STR(%esp), %eax +#ifdef __CHKP__ + bndldx STR(%esp,%eax,1), %bnd0 + bndcu (%eax),%bnd0 +#endif movl $3, %edx /* load mask (= 3) */ andl %eax, %edx /* separate last two bits of address */ @@ -48,10 +52,16 @@ ENTRY (strlen) jz L(1) /* aligned => start loop */ jp L(0) /* exactly two bits set */ +#ifdef __CHKP__ + bndcu (%eax),%bnd0 +#endif cmpb %dh, (%eax) /* is byte NUL? */ je L(2) /* yes => return */ incl %eax /* increment pointer */ +#ifdef __CHKP__ + bndcu (%eax),%bnd0 +#endif cmpb %dh, (%eax) /* is byte NUL? */ je L(2) /* yes => return */ @@ -61,7 +71,11 @@ ENTRY (strlen) jz L(1) -L(0): cmpb %dh, (%eax) /* is byte NUL? */ +L(0): +#ifdef __CHKP__ + bndcu (%eax),%bnd0 +#endif + cmpb %dh, (%eax) /* is byte NUL? */ je L(2) /* yes => return */ incl %eax /* increment pointer */ @@ -174,7 +188,11 @@ L(3): subl $4, %eax /* correct too early pointer increment */ incl %eax /* increment pointer */ -L(2): subl STR(%esp), %eax /* now compute the length as difference +L(2): +#ifdef __CHKP__ + bndcu (%eax),%bnd0 +#endif + subl STR(%esp), %eax /* now compute the length as difference between start and terminating NUL character */ ret diff --git a/sysdeps/i386/i686/memcmp.S b/sysdeps/i386/i686/memcmp.S index b8091a60ec..6cb03e7a7b 100644 --- a/sysdeps/i386/i686/memcmp.S +++ b/sysdeps/i386/i686/memcmp.S @@ -48,9 +48,19 @@ ENTRY (memcmp) movl BLK1(%esp), %eax movl BLK2(%esp), %edx movl LEN(%esp), %ecx +#ifdef __CHKP__ + bndldx BLK1(%esp,%eax,1), %bnd0 + bndldx BLK2(%esp,%edx,1), %bnd1 +#endif cmpl $1, %ecx jne L(not_1) +#ifdef __CHKP__ + bndcl (%eax), %bnd0 + bndcu (%eax), %bnd0 + bndcl (%edx), %bnd1 + bndcu (%edx), %bnd1 +#endif movzbl (%eax), %ecx /* LEN == 1 */ cmpb (%edx), %cl jne L(neq) @@ -69,6 +79,12 @@ L(neq): cfi_rel_offset (ebx, 0) L(not_1): jl L(bye) /* LEN == 0 */ +#ifdef __CHKP__ + bndcl (%eax), %bnd0 + bndcu (%eax), %bnd0 + bndcl (%edx), %bnd1 + bndcu (%edx), %bnd1 +#endif pushl %esi cfi_adjust_cfa_offset (4) @@ -84,36 +100,64 @@ L(not_1): ALIGN (4) L(28bytes): +#ifdef __CHKP__ + bndcu -28(%esi), %bnd0 + bndcu -28(%edx), %bnd1 +#endif movl -28(%esi), %eax movl -28(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(24bytes): +#ifdef __CHKP__ + bndcu -24(%esi), %bnd0 + bndcu -24(%edx), %bnd1 +#endif movl -24(%esi), %eax movl -24(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(20bytes): +#ifdef __CHKP__ + bndcu -20(%esi), %bnd0 + bndcu -20(%edx), %bnd1 +#endif movl -20(%esi), %eax movl -20(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(16bytes): +#ifdef __CHKP__ + bndcu -16(%esi), %bnd0 + bndcu -16(%edx), %bnd1 +#endif movl -16(%esi), %eax movl -16(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(12bytes): +#ifdef __CHKP__ + bndcu -12(%esi), %bnd0 + bndcu -12(%edx), %bnd1 +#endif movl -12(%esi), %eax movl -12(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(8bytes): +#ifdef __CHKP__ + bndcu -8(%esi), %bnd0 + bndcu -8(%edx), %bnd1 +#endif movl -8(%esi), %eax movl -8(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(4bytes): +#ifdef __CHKP__ + bndcu -4(%esi), %bnd0 + bndcu -4(%edx), %bnd1 +#endif movl -4(%esi), %eax movl -4(%edx), %ecx cmpl %ecx, %eax @@ -129,41 +173,73 @@ L(0bytes): cfi_rel_offset (esi, 0) cfi_rel_offset (ebx, 4) L(29bytes): +#ifdef __CHKP__ + bndcu -29(%esi), %bnd0 + bndcu -29(%edx), %bnd1 +#endif movl -29(%esi), %eax movl -29(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(25bytes): +#ifdef __CHKP__ + bndcu -25(%esi), %bnd0 + bndcu -25(%edx), %bnd1 +#endif movl -25(%esi), %eax movl -25(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(21bytes): +#ifdef __CHKP__ + bndcu -21(%esi), %bnd0 + bndcu -21(%edx), %bnd1 +#endif movl -21(%esi), %eax movl -21(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(17bytes): +#ifdef __CHKP__ + bndcu -17(%esi), %bnd0 + bndcu -17(%edx), %bnd1 +#endif movl -17(%esi), %eax movl -17(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(13bytes): +#ifdef __CHKP__ + bndcu -13(%esi), %bnd0 + bndcu -13(%edx), %bnd1 +#endif movl -13(%esi), %eax movl -13(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(9bytes): +#ifdef __CHKP__ + bndcu -9(%esi), %bnd0 + bndcu -9(%edx), %bnd1 +#endif movl -9(%esi), %eax movl -9(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(5bytes): +#ifdef __CHKP__ + bndcu -5(%esi), %bnd0 + bndcu -5(%edx), %bnd1 +#endif movl -5(%esi), %eax movl -5(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(1bytes): +#ifdef __CHKP__ + bndcu -1(%esi), %bnd0 + bndcu -1(%edx), %bnd1 +#endif movzbl -1(%esi), %eax cmpb -1(%edx), %al jne L(set) @@ -177,41 +253,73 @@ L(1bytes): cfi_rel_offset (esi, 0) cfi_rel_offset (ebx, 4) L(30bytes): +#ifdef __CHKP__ + bndcu -30(%esi), %bnd0 + bndcu -30(%edx), %bnd1 +#endif movl -30(%esi), %eax movl -30(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(26bytes): +#ifdef __CHKP__ + bndcu -26(%esi), %bnd0 + bndcu -26(%edx), %bnd1 +#endif movl -26(%esi), %eax movl -26(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(22bytes): +#ifdef __CHKP__ + bndcu -22(%esi), %bnd0 + bndcu -22(%edx), %bnd1 +#endif movl -22(%esi), %eax movl -22(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(18bytes): +#ifdef __CHKP__ + bndcu -18(%esi), %bnd0 + bndcu -18(%edx), %bnd1 +#endif movl -18(%esi), %eax movl -18(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(14bytes): +#ifdef __CHKP__ + bndcu -14(%esi), %bnd0 + bndcu -14(%edx), %bnd1 +#endif movl -14(%esi), %eax movl -14(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(10bytes): +#ifdef __CHKP__ + bndcu -10(%esi), %bnd0 + bndcu -10(%edx), %bnd1 +#endif movl -10(%esi), %eax movl -10(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(6bytes): +#ifdef __CHKP__ + bndcu -6(%esi), %bnd0 + bndcu -6(%edx), %bnd1 +#endif movl -6(%esi), %eax movl -6(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(2bytes): +#ifdef __CHKP__ + bndcu -2(%esi), %bnd0 + bndcu -2(%edx), %bnd1 +#endif movzwl -2(%esi), %eax movzwl -2(%edx), %ecx cmpb %cl, %al @@ -228,41 +336,73 @@ L(2bytes): cfi_rel_offset (esi, 0) cfi_rel_offset (ebx, 4) L(31bytes): +#ifdef __CHKP__ + bndcu -31(%esi), %bnd0 + bndcu -31(%edx), %bnd1 +#endif movl -31(%esi), %eax movl -31(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(27bytes): +#ifdef __CHKP__ + bndcu -27(%esi), %bnd0 + bndcu -27(%edx), %bnd1 +#endif movl -27(%esi), %eax movl -27(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(23bytes): +#ifdef __CHKP__ + bndcu -23(%esi), %bnd0 + bndcu -23(%edx), %bnd1 +#endif movl -23(%esi), %eax movl -23(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(19bytes): +#ifdef __CHKP__ + bndcu -19(%esi), %bnd0 + bndcu -19(%edx), %bnd1 +#endif movl -19(%esi), %eax movl -19(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(15bytes): +#ifdef __CHKP__ + bndcu -15(%esi), %bnd0 + bndcu -15(%edx), %bnd1 +#endif movl -15(%esi), %eax movl -15(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(11bytes): +#ifdef __CHKP__ + bndcu -11(%esi), %bnd0 + bndcu -11(%edx), %bnd1 +#endif movl -11(%esi), %eax movl -11(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(7bytes): +#ifdef __CHKP__ + bndcu -7(%esi), %bnd0 + bndcu -7(%edx), %bnd1 +#endif movl -7(%esi), %eax movl -7(%edx), %ecx cmpl %ecx, %eax jne L(find_diff) L(3bytes): +#ifdef __CHKP__ + bndcu -3(%esi), %bnd0 + bndcu -3(%edx), %bnd1 +#endif movzwl -3(%esi), %eax movzwl -3(%edx), %ecx cmpb %cl, %al @@ -286,34 +426,66 @@ L(3bytes): L(32bytesormore): subl $32, %ecx +#ifdef __CHKP__ + bndcu (%esi), %bnd0 + bndcu (%edx), %bnd1 +#endif movl (%esi), %eax cmpl (%edx), %eax jne L(load_ecx) +#ifdef __CHKP__ + bndcu 4(%esi), %bnd0 + bndcu 4(%edx), %bnd1 +#endif movl 4(%esi), %eax cmpl 4(%edx), %eax jne L(load_ecx_4) +#ifdef __CHKP__ + bndcu 8(%esi), %bnd0 + bndcu 8(%edx), %bnd1 +#endif movl 8(%esi), %eax cmpl 8(%edx), %eax jne L(load_ecx_8) +#ifdef __CHKP__ + bndcu 12(%esi), %bnd0 + bndcu 12(%edx), %bnd1 +#endif movl 12(%esi), %eax cmpl 12(%edx), %eax jne L(load_ecx_12) +#ifdef __CHKP__ + bndcu 16(%esi), %bnd0 + bndcu 16(%edx), %bnd1 +#endif movl 16(%esi), %eax cmpl 16(%edx), %eax jne L(load_ecx_16) +#ifdef __CHKP__ + bndcu 20(%esi), %bnd0 + bndcu 20(%edx), %bnd1 +#endif movl 20(%esi), %eax cmpl 20(%edx), %eax jne L(load_ecx_20) +#ifdef __CHKP__ + bndcu 24(%esi), %bnd0 + bndcu 24(%edx), %bnd1 +#endif movl 24(%esi), %eax cmpl 24(%edx), %eax jne L(load_ecx_24) +#ifdef __CHKP__ + bndcu 28(%esi), %bnd0 + bndcu 28(%edx), %bnd1 +#endif movl 28(%esi), %eax cmpl 28(%edx), %eax jne L(load_ecx_28) diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S index aed79a8aa9..3fd4370b02 100644 --- a/sysdeps/i386/i686/memset.S +++ b/sysdeps/i386/i686/memset.S @@ -50,6 +50,11 @@ ENTRY (memset) cfi_adjust_cfa_offset (4) movl DEST(%esp), %edx movl LEN(%esp), %ecx +#ifdef __CHKP__ + bndldx DEST(%esp,%edx,1),%bnd0 + bndcl (%edx), %bnd0 + bndcu -1(%edx, %ecx), %bnd0 +#endif #if BZERO_P xorl %eax, %eax /* fill with 0 */ #else diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile index 8946bfa586..7a4999a808 100644 --- a/sysdeps/i386/i686/multiarch/Makefile +++ b/sysdeps/i386/i686/multiarch/Makefile @@ -6,9 +6,7 @@ endif ifeq ($(subdir),string) gen-as-const-headers += locale-defines.sym -sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \ - memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \ - memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \ +sysdep_routines += bzero-sse2 memset-sse2 \ memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \ strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \ memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift \ @@ -23,7 +21,8 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \ strnlen-sse2 strnlen-c \ strcasecmp_l-c strcasecmp-c strcasecmp_l-ssse3 \ strncase_l-c strncase-c strncase_l-ssse3 \ - strcasecmp_l-sse4 strncase_l-sse4 + strcasecmp_l-sse4 strncase_l-sse4 mpx_memcpy_nobnd \ + mpx_mempcpy_nobnd mpx_memmove_nobnd ifeq (yes,$(config-cflags-sse4)) sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c CFLAGS-varshift.c += -msse4 diff --git a/sysdeps/i386/i686/multiarch/Versions b/sysdeps/i386/i686/multiarch/Versions index 59b185ac8d..7f0cbbcb27 100644 --- a/sysdeps/i386/i686/multiarch/Versions +++ b/sysdeps/i386/i686/multiarch/Versions @@ -2,4 +2,11 @@ libc { GLIBC_PRIVATE { __get_cpu_features; } +%ifdef __CHKP__ + GLIBC_2.14 { + mpx_memcpy_nobnd; + mpx_memmove_nobnd; + mpx_mempcpy_nobnd; + } +%endif } diff --git a/sysdeps/i386/i686/multiarch/__bcopy.S b/sysdeps/i386/i686/multiarch/__bcopy.S new file mode 100644 index 0000000000..f8e40aff94 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/__bcopy.S @@ -0,0 +1,81 @@ +/* Multiple versions of bcopy + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2010-2013 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +/* Define multiple versions only for the definition in lib. */ +#ifndef NOT_IN_libc +# ifdef SHARED + .text +ENTRY(bcopy) + .type bcopy, @gnu_indirect_function + pushl %ebx + cfi_adjust_cfa_offset (4) + cfi_rel_offset (ebx, 0) + LOAD_PIC_REG(bx) + cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) + jne 1f + call __init_cpu_features +1: leal __bcopy_ia32@GOTOFF(%ebx), %eax + testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __bcopy_ssse3@GOTOFF(%ebx), %eax + testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __bcopy_ssse3_rep@GOTOFF(%ebx), %eax +2: popl %ebx + cfi_adjust_cfa_offset (-4) + cfi_restore (ebx) + ret +END(bcopy) +# else + .text +ENTRY(bcopy) + .type bcopy, @gnu_indirect_function + cmpl $0, KIND_OFFSET+__cpu_features + jne 1f + call __init_cpu_features +1: leal __bcopy_ia32, %eax + testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features + jz 2f + leal __bcopy_ssse3, %eax + testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features + jz 2f + leal __bcopy_ssse3_rep, %eax +2: ret +END(bcopy) +# endif + +# undef ENTRY +# define ENTRY(name) \ + .type __bcopy_ia32, @function; \ + .p2align 4; \ + .globl __bcopy_ia32; \ + .hidden __bcopy_ia32; \ + __bcopy_ia32: cfi_startproc; \ + CALL_MCOUNT +# undef END +# define END(name) \ + cfi_endproc; .size __bcopy_ia32, .-__bcopy_ia32 + +#endif + +#include "../bcopy.S" diff --git a/sysdeps/i386/i686/multiarch/__memcpy.S b/sysdeps/i386/i686/multiarch/__memcpy.S new file mode 100644 index 0000000000..9a6cd75669 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/__memcpy.S @@ -0,0 +1,82 @@ +/* Multiple versions of memcpy + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2010-2013 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +/* Define multiple versions only for the definition in lib and for + DSO. In static binaries we need memcpy before the initialization + happened. */ +#if defined SHARED && !defined NOT_IN_libc + .text +ENTRY(memcpy) + .type memcpy, @gnu_indirect_function + pushl %ebx + cfi_adjust_cfa_offset (4) + cfi_rel_offset (ebx, 0) + LOAD_PIC_REG(bx) + cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) + jne 1f + call __init_cpu_features +1: leal __memcpy_ia32@GOTOFF(%ebx), %eax + testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __memcpy_ssse3@GOTOFF(%ebx), %eax + testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __memcpy_ssse3_rep@GOTOFF(%ebx), %eax +2: popl %ebx + cfi_adjust_cfa_offset (-4) + cfi_restore (ebx) + ret +END(memcpy) + +# undef ENTRY +# define ENTRY(name) \ + .type __memcpy_ia32, @function; \ + .p2align 4; \ + .globl __memcpy_ia32; \ + .hidden __memcpy_ia32; \ + __memcpy_ia32: cfi_startproc; \ + CALL_MCOUNT +# undef END +# define END(name) \ + cfi_endproc; .size __memcpy_ia32, .-__memcpy_ia32 + +# undef ENTRY_CHK +# define ENTRY_CHK(name) \ + .type __memcpy_chk_ia32, @function; \ + .globl __memcpy_chk_ia32; \ + .p2align 4; \ + __memcpy_chk_ia32: cfi_startproc; \ + CALL_MCOUNT +# undef END_CHK +# define END_CHK(name) \ + cfi_endproc; .size __memcpy_chk_ia32, .-__memcpy_chk_ia32 + +# undef libc_hidden_builtin_def +/* IFUNC doesn't work with the hidden functions in shared library since + they will be called without setting up EBX needed for PLT which is + used by IFUNC. */ +# define libc_hidden_builtin_def(name) \ + .globl __GI_memcpy; __GI_memcpy = __memcpy_ia32 +#endif + +#include "../memcpy.S" diff --git a/sysdeps/i386/i686/multiarch/__memcpy_chk.S b/sysdeps/i386/i686/multiarch/__memcpy_chk.S new file mode 100644 index 0000000000..f66ecfe16b --- /dev/null +++ b/sysdeps/i386/i686/multiarch/__memcpy_chk.S @@ -0,0 +1,54 @@ +/* Multiple versions of __memcpy_chk + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2010-2013 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +/* Define multiple versions only for the definition in lib and for + DSO. There are no multiarch memcpy functions for static binaries. + */ +#ifndef NOT_IN_libc +# ifdef SHARED + .text +ENTRY(__memcpy_chk) + .type __memcpy_chk, @gnu_indirect_function + pushl %ebx + cfi_adjust_cfa_offset (4) + cfi_rel_offset (ebx, 0) + LOAD_PIC_REG(bx) + cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) + jne 1f + call __init_cpu_features +1: leal __memcpy_chk_ia32@GOTOFF(%ebx), %eax + testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __memcpy_chk_ssse3@GOTOFF(%ebx), %eax + testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __memcpy_chk_ssse3_rep@GOTOFF(%ebx), %eax +2: popl %ebx + cfi_adjust_cfa_offset (-4) + cfi_restore (ebx) + ret +END(__memcpy_chk) +# else +# include "../memcpy_chk.S" +# endif +#endif diff --git a/sysdeps/i386/i686/multiarch/__memmove.S b/sysdeps/i386/i686/multiarch/__memmove.S new file mode 100644 index 0000000000..2d20dd9409 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/__memmove.S @@ -0,0 +1,109 @@ +/* Multiple versions of memmove + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2010-2013 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +/* Define multiple versions only for the definition in lib. */ +#ifndef NOT_IN_libc +# ifdef SHARED + .text +ENTRY(memmove) + .type memmove, @gnu_indirect_function + pushl %ebx + cfi_adjust_cfa_offset (4) + cfi_rel_offset (ebx, 0) + LOAD_PIC_REG(bx) + cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) + jne 1f + call __init_cpu_features +1: leal __memmove_ia32@GOTOFF(%ebx), %eax + testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __memmove_ssse3@GOTOFF(%ebx), %eax + testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __memmove_ssse3_rep@GOTOFF(%ebx), %eax +2: popl %ebx + cfi_adjust_cfa_offset (-4) + cfi_restore (ebx) + ret +END(memmove) + +# undef ENTRY +# define ENTRY(name) \ + .type __memmove_ia32, @function; \ + .p2align 4; \ + .globl __memmove_ia32; \ + .hidden __memmove_ia32; \ + __memmove_ia32: cfi_startproc; \ + CALL_MCOUNT +# else + .text +ENTRY(memmove) + .type memmove, @gnu_indirect_function + cmpl $0, KIND_OFFSET+__cpu_features + jne 1f + call __init_cpu_features +1: leal __memmove_ia32, %eax + testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features + jz 2f + leal __memmove_ssse3, %eax + testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features + jz 2f + leal __memmove_ssse3_rep, %eax +2: ret +END(memmove) + +# undef ENTRY +# define ENTRY(name) \ + .type __memmove_ia32, @function; \ + .globl __memmove_ia32; \ + .p2align 4; \ + __memmove_ia32: cfi_startproc; \ + CALL_MCOUNT +# endif + +# undef END +# define END(name) \ + cfi_endproc; .size __memmove_ia32, .-__memmove_ia32 + +# undef ENTRY_CHK +# define ENTRY_CHK(name) \ + .type __memmove_chk_ia32, @function; \ + .globl __memmove_chk_ia32; \ + .p2align 4; \ + __memmove_chk_ia32: cfi_startproc; \ + CALL_MCOUNT +# undef END_CHK +# define END_CHK(name) \ + cfi_endproc; .size __memmove_chk_ia32, .-__memmove_chk_ia32 + +# ifdef SHARED +# undef libc_hidden_builtin_def +/* IFUNC doesn't work with the hidden functions in shared library since + they will be called without setting up EBX needed for PLT which is + used by IFUNC. */ +# define libc_hidden_builtin_def(name) \ + .globl __GI_memmove; __GI_memmove = __memmove_ia32 +# endif +#endif + +#include "../memmove.S" diff --git a/sysdeps/i386/i686/multiarch/__memmove_chk.S b/sysdeps/i386/i686/multiarch/__memmove_chk.S new file mode 100644 index 0000000000..9552640d52 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/__memmove_chk.S @@ -0,0 +1,102 @@ +/* Multiple versions of __memmove_chk + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2010-2013 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +/* Define multiple versions only for the definition in lib. */ +#ifndef NOT_IN_libc +# ifdef SHARED + .text +ENTRY(__memmove_chk) + .type __memmove_chk, @gnu_indirect_function + pushl %ebx + cfi_adjust_cfa_offset (4) + cfi_rel_offset (ebx, 0) + LOAD_PIC_REG(bx) + cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) + jne 1f + call __init_cpu_features +1: leal __memmove_chk_ia32@GOTOFF(%ebx), %eax + testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __memmove_chk_ssse3@GOTOFF(%ebx), %eax + testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __memmove_chk_ssse3_rep@GOTOFF(%ebx), %eax +2: popl %ebx + cfi_adjust_cfa_offset (-4) + cfi_restore (ebx) + ret +END(__memmove_chk) +# else + .text +ENTRY(__memmove_chk) + .type __memmove_chk, @gnu_indirect_function + cmpl $0, KIND_OFFSET+__cpu_features + jne 1f + call __init_cpu_features +1: leal __memmove_chk_ia32, %eax + testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features + jz 2f + leal __memmove_chk_ssse3, %eax + testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features + jz 2f + leal __memmove_chk_ssse3_rep, %eax +2: ret +END(__memmove_chk) + + .type __memmove_chk_ssse3, @function + .p2align 4; +__memmove_chk_ssse3: + cfi_startproc + CALL_MCOUNT + movl 12(%esp), %eax + cmpl %eax, 16(%esp) + jb __chk_fail + jmp __memmove_ssse3 + cfi_endproc + .size __memmove_chk_ssse3, .-__memmove_chk_ssse3 + + .type __memmove_chk_ssse3_rep, @function + .p2align 4; +__memmove_chk_ssse3_rep: + cfi_startproc + CALL_MCOUNT + movl 12(%esp), %eax + cmpl %eax, 16(%esp) + jb __chk_fail + jmp __memmove_ssse3_rep + cfi_endproc + .size __memmove_chk_ssse3_rep, .-__memmove_chk_ssse3_rep + + .type __memmove_chk_ia32, @function + .p2align 4; +__memmove_chk_ia32: + cfi_startproc + CALL_MCOUNT + movl 12(%esp), %eax + cmpl %eax, 16(%esp) + jb __chk_fail + jmp __memmove_ia32 + cfi_endproc + .size __memmove_chk_ia32, .-__memmove_chk_ia32 +# endif +#endif diff --git a/sysdeps/i386/i686/multiarch/__mempcpy.S b/sysdeps/i386/i686/multiarch/__mempcpy.S new file mode 100644 index 0000000000..83bd1f2075 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/__mempcpy.S @@ -0,0 +1,85 @@ +/* Multiple versions of mempcpy + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2010-2013 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +/* Define multiple versions only for the definition in lib and for + DSO. In static binaries we need mempcpy before the initialization + happened. */ +#if defined SHARED && !defined NOT_IN_libc + .text +ENTRY(__mempcpy) + .type __mempcpy, @gnu_indirect_function + pushl %ebx + cfi_adjust_cfa_offset (4) + cfi_rel_offset (ebx, 0) + LOAD_PIC_REG(bx) + cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) + jne 1f + call __init_cpu_features +1: leal __mempcpy_ia32@GOTOFF(%ebx), %eax + testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __mempcpy_ssse3@GOTOFF(%ebx), %eax + testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __mempcpy_ssse3_rep@GOTOFF(%ebx), %eax +2: popl %ebx + cfi_adjust_cfa_offset (-4) + cfi_restore (ebx) + ret +END(__mempcpy) + +# undef ENTRY +# define ENTRY(name) \ + .type __mempcpy_ia32, @function; \ + .p2align 4; \ + .globl __mempcpy_ia32; \ + .hidden __mempcpy_ia32; \ + __mempcpy_ia32: cfi_startproc; \ + CALL_MCOUNT +# undef END +# define END(name) \ + cfi_endproc; .size __mempcpy_ia32, .-__mempcpy_ia32 + +# undef ENTRY_CHK +# define ENTRY_CHK(name) \ + .type __mempcpy_chk_ia32, @function; \ + .globl __mempcpy_chk_ia32; \ + .p2align 4; \ + __mempcpy_chk_ia32: cfi_startproc; \ + CALL_MCOUNT +# undef END_CHK +# define END_CHK(name) \ + cfi_endproc; .size __mempcpy_chk_ia32, .-__mempcpy_chk_ia32 + +# undef libc_hidden_def +# undef libc_hidden_builtin_def +/* IFUNC doesn't work with the hidden functions in shared library since + they will be called without setting up EBX needed for PLT which is + used by IFUNC. */ +# define libc_hidden_def(name) \ + .globl __GI_mempcpy; __GI_mempcpy = __mempcpy_ia32 +# define libc_hidden_builtin_def(name) \ + .globl __GI___mempcpy; __GI___mempcpy = __mempcpy_ia32 +#endif + +#include "../mempcpy.S" diff --git a/sysdeps/i386/i686/multiarch/__mempcpy_chk.S b/sysdeps/i386/i686/multiarch/__mempcpy_chk.S new file mode 100644 index 0000000000..7bd4eb1406 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/__mempcpy_chk.S @@ -0,0 +1,54 @@ +/* Multiple versions of __mempcpy_chk + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2010-2013 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +/* Define multiple versions only for the definition in lib and for + DSO. There are no multiarch mempcpy functions for static binaries. + */ +#ifndef NOT_IN_libc +# ifdef SHARED + .text +ENTRY(__mempcpy_chk) + .type __mempcpy_chk, @gnu_indirect_function + pushl %ebx + cfi_adjust_cfa_offset (4) + cfi_rel_offset (ebx, 0) + LOAD_PIC_REG(bx) + cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) + jne 1f + call __init_cpu_features +1: leal __mempcpy_chk_ia32@GOTOFF(%ebx), %eax + testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __mempcpy_chk_ssse3@GOTOFF(%ebx), %eax + testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __mempcpy_chk_ssse3_rep@GOTOFF(%ebx), %eax +2: popl %ebx + cfi_adjust_cfa_offset (-4) + cfi_restore (ebx) + ret +END(__mempcpy_chk) +# else +# include "../mempcpy_chk.S" +# endif +#endif diff --git a/sysdeps/i386/i686/multiarch/bcopy.S b/sysdeps/i386/i686/multiarch/bcopy.S deleted file mode 100644 index f8e40aff94..0000000000 --- a/sysdeps/i386/i686/multiarch/bcopy.S +++ /dev/null @@ -1,81 +0,0 @@ -/* Multiple versions of bcopy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2013 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - -/* Define multiple versions only for the definition in lib. */ -#ifndef NOT_IN_libc -# ifdef SHARED - .text -ENTRY(bcopy) - .type bcopy, @gnu_indirect_function - pushl %ebx - cfi_adjust_cfa_offset (4) - cfi_rel_offset (ebx, 0) - LOAD_PIC_REG(bx) - cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) - jne 1f - call __init_cpu_features -1: leal __bcopy_ia32@GOTOFF(%ebx), %eax - testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) - jz 2f - leal __bcopy_ssse3@GOTOFF(%ebx), %eax - testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) - jz 2f - leal __bcopy_ssse3_rep@GOTOFF(%ebx), %eax -2: popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - ret -END(bcopy) -# else - .text -ENTRY(bcopy) - .type bcopy, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features - jne 1f - call __init_cpu_features -1: leal __bcopy_ia32, %eax - testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features - jz 2f - leal __bcopy_ssse3, %eax - testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features - jz 2f - leal __bcopy_ssse3_rep, %eax -2: ret -END(bcopy) -# endif - -# undef ENTRY -# define ENTRY(name) \ - .type __bcopy_ia32, @function; \ - .p2align 4; \ - .globl __bcopy_ia32; \ - .hidden __bcopy_ia32; \ - __bcopy_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __bcopy_ia32, .-__bcopy_ia32 - -#endif - -#include "../bcopy.S" diff --git a/sysdeps/i386/i686/multiarch/bcopy.c b/sysdeps/i386/i686/multiarch/bcopy.c new file mode 100644 index 0000000000..6f5efba461 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/bcopy.c @@ -0,0 +1,7 @@ +#include + +void +bcopy (const void *src, void *dst, size_t n) +{ + memmove (dst, src, n); +} diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c index 2c282bdb72..63f0704912 100644 --- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c +++ b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c @@ -37,11 +37,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, size_t i = 0; /* Support sysdeps/i386/i686/multiarch/bcopy.S. */ - IFUNC_IMPL (i, name, bcopy, - IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3, - __bcopy_ssse3_rep) - IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3, __bcopy_ssse3) - IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32)) +// IFUNC_IMPL (i, name, bcopy, +// IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3, +// __bcopy_ssse3_rep) +// IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3, __bcopy_ssse3) +// IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32)) /* Support sysdeps/i386/i686/multiarch/bzero.S. */ IFUNC_IMPL (i, name, bzero, @@ -64,21 +64,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_ia32)) /* Support sysdeps/i386/i686/multiarch/memmove_chk.S. */ - IFUNC_IMPL (i, name, __memmove_chk, - IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3, - __memmove_chk_ssse3_rep) - IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3, - __memmove_chk_ssse3) - IFUNC_IMPL_ADD (array, i, __memmove_chk, 1, - __memmove_chk_ia32)) +// IFUNC_IMPL (i, name, __memmove_chk, +// IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3, +// __memmove_chk_ssse3_rep) +// IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3, +// __memmove_chk_ssse3) +// IFUNC_IMPL_ADD (array, i, __memmove_chk, 1, +// __memmove_chk_ia32)) /* Support sysdeps/i386/i686/multiarch/memmove.S. */ - IFUNC_IMPL (i, name, memmove, - IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3, - __memmove_ssse3_rep) - IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3, - __memmove_ssse3) - IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32)) +// IFUNC_IMPL (i, name, memmove, +// IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3, +// __memmove_ssse3_rep) +// IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3, +// __memmove_ssse3) +// IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32)) /* Support sysdeps/i386/i686/multiarch/memrchr.S. */ IFUNC_IMPL (i, name, memrchr, @@ -274,37 +274,37 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, #ifdef SHARED /* Support sysdeps/i386/i686/multiarch/memcpy_chk.S. */ - IFUNC_IMPL (i, name, __memcpy_chk, - IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3, - __memcpy_chk_ssse3_rep) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3, - __memcpy_chk_ssse3) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1, - __memcpy_chk_ia32)) +// IFUNC_IMPL (i, name, __memcpy_chk, +// IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3, +// __memcpy_chk_ssse3_rep) +// IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3, +// __memcpy_chk_ssse3) +// IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1, +// __memcpy_chk_ia32)) /* Support sysdeps/i386/i686/multiarch/memcpy.S. */ - IFUNC_IMPL (i, name, memcpy, - IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, - __memcpy_ssse3_rep) - IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3) - IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32)) +// IFUNC_IMPL (i, name, memcpy, +// IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, +// __memcpy_ssse3_rep) +// IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3) +// IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32)) /* Support sysdeps/i386/i686/multiarch/mempcpy_chk.S. */ - IFUNC_IMPL (i, name, __mempcpy_chk, - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3, - __mempcpy_chk_ssse3_rep) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3, - __mempcpy_chk_ssse3) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1, - __mempcpy_chk_ia32)) +// IFUNC_IMPL (i, name, __mempcpy_chk, +// IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3, +// __mempcpy_chk_ssse3_rep) +// IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3, +// __mempcpy_chk_ssse3) +// IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1, +// __mempcpy_chk_ia32)) /* Support sysdeps/i386/i686/multiarch/mempcpy.S. */ - IFUNC_IMPL (i, name, mempcpy, - IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3, - __mempcpy_ssse3_rep) - IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3, - __mempcpy_ssse3) - IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32)) +// IFUNC_IMPL (i, name, mempcpy, +// IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3, +// __mempcpy_ssse3_rep) +// IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3, +// __mempcpy_ssse3) +// IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32)) /* Support sysdeps/i386/i686/multiarch/strlen.S. */ IFUNC_IMPL (i, name, strlen, diff --git a/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S index d3641778f3..80be0d9371 100644 --- a/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S +++ b/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S @@ -58,6 +58,12 @@ ENTRY (MEMCHR) # endif mov %ecx, %eax +# ifdef __CHKP__ + bndldx STR1(%esp,%ecx,1), %bnd0 + bndcl (%ecx), %bnd0 + bndcu (%ecx), %bnd0 +# endif + punpcklbw %xmm1, %xmm1 punpcklbw %xmm1, %xmm1 @@ -79,9 +85,18 @@ ENTRY (MEMCHR) # ifndef USE_AS_RAWMEMCHR sub %ecx, %edx jbe L(return_null_1) -# endif add %ecx, %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif + ret +# else + add %ecx, %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif ret +# endif .p2align 4 L(unaligned_no_match_1): @@ -163,8 +178,15 @@ L(loop_prolog): # ifndef USE_AS_RAWMEMCHR sub $64, %edx jbe L(exit_loop) +# ifdef __CHKP__ + bndcu (%edi), %bnd0 +# endif movdqa (%edi), %xmm0 # else + +# ifdef __CHKP__ + bndcu (%edx), %bnd0 +# endif movdqa (%edx), %xmm0 # endif pcmpeqb %xmm1, %xmm0 @@ -173,8 +195,15 @@ L(loop_prolog): jnz L(matches) # ifndef USE_AS_RAWMEMCHR +# ifdef __CHKP__ + bndcu 16(%edi), %bnd0 +# endif movdqa 16(%edi), %xmm2 # else + +# ifdef __CHKP__ + bndcu 16(%edx), %bnd0 +# endif movdqa 16(%edx), %xmm2 # endif pcmpeqb %xmm1, %xmm2 @@ -183,8 +212,15 @@ L(loop_prolog): jnz L(matches16) # ifndef USE_AS_RAWMEMCHR +# ifdef __CHKP__ + bndcu 32(%edi), %bnd0 +# endif movdqa 32(%edi), %xmm3 # else + +# ifdef __CHKP__ + bndcu 32(%edx), %bnd0 +# endif movdqa 32(%edx), %xmm3 # endif pcmpeqb %xmm1, %xmm3 @@ -193,8 +229,15 @@ L(loop_prolog): jnz L(matches32) # ifndef USE_AS_RAWMEMCHR +# ifdef __CHKP__ + bndcu 48(%edi), %bnd0 +# endif movdqa 48(%edi), %xmm4 # else + +# ifdef __CHKP__ + bndcu 48(%edx), %bnd0 +# endif movdqa 48(%edx), %xmm4 # endif pcmpeqb %xmm1, %xmm4 @@ -277,11 +320,18 @@ L(align64_loop): # ifndef USE_AS_RAWMEMCHR sub $64, %edx jbe L(exit_loop) +# ifdef __CHKP__ + bndcu (%edi), %bnd0 +# endif movdqa (%edi), %xmm0 movdqa 16(%edi), %xmm2 movdqa 32(%edi), %xmm3 movdqa 48(%edi), %xmm4 # else + +# ifdef __CHKP__ + bndcu (%edx), %bnd0 +# endif movdqa (%edx), %xmm0 movdqa 16(%edx), %xmm2 movdqa 32(%edx), %xmm3 @@ -342,9 +392,15 @@ L(align64_loop): # ifndef USE_AS_RAWMEMCHR lea 48(%edi, %eax), %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif RETURN # else lea 48(%edx, %eax), %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif ret # endif @@ -404,9 +460,15 @@ L(matches0): bsf %eax, %eax # ifndef USE_AS_RAWMEMCHR lea -16(%eax, %edi), %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif RETURN # else lea -16(%eax, %edx), %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif ret # endif @@ -415,9 +477,15 @@ L(matches): bsf %eax, %eax # ifndef USE_AS_RAWMEMCHR add %edi, %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif RETURN # else add %edx, %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif ret # endif @@ -426,9 +494,15 @@ L(matches16): bsf %eax, %eax # ifndef USE_AS_RAWMEMCHR lea 16(%eax, %edi), %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif RETURN # else lea 16(%eax, %edx), %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif ret # endif @@ -437,9 +511,15 @@ L(matches32): bsf %eax, %eax # ifndef USE_AS_RAWMEMCHR lea 32(%eax, %edi), %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif RETURN # else lea 32(%eax, %edx), %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif ret # endif diff --git a/sysdeps/i386/i686/multiarch/memcmp-sse4.S b/sysdeps/i386/i686/multiarch/memcmp-sse4.S index 2984a374cf..3ccfe66da7 100644 --- a/sysdeps/i386/i686/multiarch/memcmp-sse4.S +++ b/sysdeps/i386/i686/multiarch/memcmp-sse4.S @@ -91,6 +91,15 @@ ENTRY (MEMCMP) jbe L(less1bytes) # endif +# ifdef __CHKP__ + bndldx BLK1(%esp,%eax,1), %bnd0 + bndldx BLK2(%esp,%edx,1), %bnd1 + bndcl (%eax), %bnd0 + bndcl (%edx), %bnd1 + bndcu (%eax), %bnd0 + bndcu (%edx), %bnd1 +# endif + pxor %xmm0, %xmm0 cmp $64, %ecx ja L(64bytesormore) @@ -115,6 +124,10 @@ L(less8bytes): cmpb (%edx), %bl jne L(nonzero) +# ifdef __CHKP__ + bndcu 1(%eax), %bnd0 + bndcu 1(%edx), %bnd1 +# endif mov 1(%eax), %bl cmpb 1(%edx), %bl jne L(nonzero) @@ -122,6 +135,10 @@ L(less8bytes): cmp $2, %ecx jz L(0bytes) +# ifdef __CHKP__ + bndcu 2(%eax), %bnd0 + bndcu 2(%edx), %bnd1 +# endif mov 2(%eax), %bl cmpb 2(%edx), %bl jne L(nonzero) @@ -129,6 +146,10 @@ L(less8bytes): cmp $3, %ecx jz L(0bytes) +# ifdef __CHKP__ + bndcu 3(%eax), %bnd0 + bndcu 3(%edx), %bnd1 +# endif mov 3(%eax), %bl cmpb 3(%edx), %bl jne L(nonzero) @@ -136,6 +157,10 @@ L(less8bytes): cmp $4, %ecx jz L(0bytes) +# ifdef __CHKP__ + bndcu 4(%eax), %bnd0 + bndcu 4(%edx), %bnd1 +# endif mov 4(%eax), %bl cmpb 4(%edx), %bl jne L(nonzero) @@ -143,6 +168,10 @@ L(less8bytes): cmp $5, %ecx jz L(0bytes) +# ifdef __CHKP__ + bndcu 5(%eax), %bnd0 + bndcu 5(%edx), %bnd1 +# endif mov 5(%eax), %bl cmpb 5(%edx), %bl jne L(nonzero) @@ -150,6 +179,10 @@ L(less8bytes): cmp $6, %ecx jz L(0bytes) +# ifdef __CHKP__ + bndcu 6(%eax), %bnd0 + bndcu 6(%edx), %bnd1 +# endif mov 6(%eax), %bl cmpb 6(%edx), %bl je L(0bytes) @@ -198,6 +231,14 @@ L(return0): .p2align 4 L(less1bytes): jb L(0bytesend) +# ifdef __CHKP__ + bndldx BLK1(%esp,%eax,1), %bnd0 + bndldx BLK2(%esp,%edx,1), %bnd1 + bndcl (%eax), %bnd0 + bndcl (%edx), %bnd1 + bndcu (%eax), %bnd0 + bndcu (%edx), %bnd1 +# endif movzbl (%eax), %eax movzbl (%edx), %edx sub %edx, %eax @@ -221,18 +262,30 @@ L(64bytesormore_loop): ptest %xmm2, %xmm0 jnc L(find_16diff) +# ifdef __CHKP__ + bndcu 16(%eax), %bnd0 + bndcu 16(%edx), %bnd1 +# endif movdqu 16(%eax), %xmm1 movdqu 16(%edx), %xmm2 pxor %xmm1, %xmm2 ptest %xmm2, %xmm0 jnc L(find_32diff) +# ifdef __CHKP__ + bndcu 32(%eax), %bnd0 + bndcu 32(%edx), %bnd1 +# endif movdqu 32(%eax), %xmm1 movdqu 32(%edx), %xmm2 pxor %xmm1, %xmm2 ptest %xmm2, %xmm0 jnc L(find_48diff) +# ifdef __CHKP__ + bndcu 48(%eax), %bnd0 + bndcu 48(%edx), %bnd1 +# endif movdqu 48(%eax), %xmm1 movdqu 48(%edx), %xmm2 pxor %xmm1, %xmm2 diff --git a/sysdeps/i386/i686/multiarch/memcpy.S b/sysdeps/i386/i686/multiarch/memcpy.S deleted file mode 100644 index 9a6cd75669..0000000000 --- a/sysdeps/i386/i686/multiarch/memcpy.S +++ /dev/null @@ -1,82 +0,0 @@ -/* Multiple versions of memcpy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2013 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - -/* Define multiple versions only for the definition in lib and for - DSO. In static binaries we need memcpy before the initialization - happened. */ -#if defined SHARED && !defined NOT_IN_libc - .text -ENTRY(memcpy) - .type memcpy, @gnu_indirect_function - pushl %ebx - cfi_adjust_cfa_offset (4) - cfi_rel_offset (ebx, 0) - LOAD_PIC_REG(bx) - cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) - jne 1f - call __init_cpu_features -1: leal __memcpy_ia32@GOTOFF(%ebx), %eax - testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) - jz 2f - leal __memcpy_ssse3@GOTOFF(%ebx), %eax - testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) - jz 2f - leal __memcpy_ssse3_rep@GOTOFF(%ebx), %eax -2: popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - ret -END(memcpy) - -# undef ENTRY -# define ENTRY(name) \ - .type __memcpy_ia32, @function; \ - .p2align 4; \ - .globl __memcpy_ia32; \ - .hidden __memcpy_ia32; \ - __memcpy_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __memcpy_ia32, .-__memcpy_ia32 - -# undef ENTRY_CHK -# define ENTRY_CHK(name) \ - .type __memcpy_chk_ia32, @function; \ - .globl __memcpy_chk_ia32; \ - .p2align 4; \ - __memcpy_chk_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END_CHK -# define END_CHK(name) \ - cfi_endproc; .size __memcpy_chk_ia32, .-__memcpy_chk_ia32 - -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_memcpy; __GI_memcpy = __memcpy_ia32 -#endif - -#include "../memcpy.S" diff --git a/sysdeps/i386/i686/multiarch/memcpy.c b/sysdeps/i386/i686/multiarch/memcpy.c new file mode 100644 index 0000000000..824cdcbc28 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/memcpy.c @@ -0,0 +1,40 @@ +#include + +void * +__memcpy (void *dst, const void *src, size_t n) +{ + const char *s = src; + char *d = dst; + void *ret = dst; + size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1); + size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1); + + if (offset_src != offset_dst) + { + while (n--) + *d++ = *s++; + } + else + { + if (offset_src) offset_src = sizeof(size_t) - offset_src; + while (n-- && offset_src--) + *d++ = *s++; + n++; + if (!n) return ret; + void **d1 = (void **)d; + void **s1 = (void **)s; + while (n >= sizeof(void *)) + { + n -= sizeof(void *); + *d1++ = *s1++; + } + s = (char *)s1; + d = (char *)d1; + while (n--) + *d++ = *s++; + } + return ret; +} + +weak_alias (__memcpy, __GI_memcpy) +weak_alias (__memcpy, memcpy) diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.S b/sysdeps/i386/i686/multiarch/memcpy_chk.S deleted file mode 100644 index f66ecfe16b..0000000000 --- a/sysdeps/i386/i686/multiarch/memcpy_chk.S +++ /dev/null @@ -1,54 +0,0 @@ -/* Multiple versions of __memcpy_chk - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2013 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - -/* Define multiple versions only for the definition in lib and for - DSO. There are no multiarch memcpy functions for static binaries. - */ -#ifndef NOT_IN_libc -# ifdef SHARED - .text -ENTRY(__memcpy_chk) - .type __memcpy_chk, @gnu_indirect_function - pushl %ebx - cfi_adjust_cfa_offset (4) - cfi_rel_offset (ebx, 0) - LOAD_PIC_REG(bx) - cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) - jne 1f - call __init_cpu_features -1: leal __memcpy_chk_ia32@GOTOFF(%ebx), %eax - testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) - jz 2f - leal __memcpy_chk_ssse3@GOTOFF(%ebx), %eax - testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) - jz 2f - leal __memcpy_chk_ssse3_rep@GOTOFF(%ebx), %eax -2: popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - ret -END(__memcpy_chk) -# else -# include "../memcpy_chk.S" -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.c b/sysdeps/i386/i686/multiarch/memcpy_chk.c new file mode 100644 index 0000000000..1eee86c639 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/memcpy_chk.c @@ -0,0 +1 @@ +#include diff --git a/sysdeps/i386/i686/multiarch/memmove.S b/sysdeps/i386/i686/multiarch/memmove.S deleted file mode 100644 index 2d20dd9409..0000000000 --- a/sysdeps/i386/i686/multiarch/memmove.S +++ /dev/null @@ -1,109 +0,0 @@ -/* Multiple versions of memmove - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2013 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - -/* Define multiple versions only for the definition in lib. */ -#ifndef NOT_IN_libc -# ifdef SHARED - .text -ENTRY(memmove) - .type memmove, @gnu_indirect_function - pushl %ebx - cfi_adjust_cfa_offset (4) - cfi_rel_offset (ebx, 0) - LOAD_PIC_REG(bx) - cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) - jne 1f - call __init_cpu_features -1: leal __memmove_ia32@GOTOFF(%ebx), %eax - testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) - jz 2f - leal __memmove_ssse3@GOTOFF(%ebx), %eax - testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) - jz 2f - leal __memmove_ssse3_rep@GOTOFF(%ebx), %eax -2: popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - ret -END(memmove) - -# undef ENTRY -# define ENTRY(name) \ - .type __memmove_ia32, @function; \ - .p2align 4; \ - .globl __memmove_ia32; \ - .hidden __memmove_ia32; \ - __memmove_ia32: cfi_startproc; \ - CALL_MCOUNT -# else - .text -ENTRY(memmove) - .type memmove, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features - jne 1f - call __init_cpu_features -1: leal __memmove_ia32, %eax - testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features - jz 2f - leal __memmove_ssse3, %eax - testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features - jz 2f - leal __memmove_ssse3_rep, %eax -2: ret -END(memmove) - -# undef ENTRY -# define ENTRY(name) \ - .type __memmove_ia32, @function; \ - .globl __memmove_ia32; \ - .p2align 4; \ - __memmove_ia32: cfi_startproc; \ - CALL_MCOUNT -# endif - -# undef END -# define END(name) \ - cfi_endproc; .size __memmove_ia32, .-__memmove_ia32 - -# undef ENTRY_CHK -# define ENTRY_CHK(name) \ - .type __memmove_chk_ia32, @function; \ - .globl __memmove_chk_ia32; \ - .p2align 4; \ - __memmove_chk_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END_CHK -# define END_CHK(name) \ - cfi_endproc; .size __memmove_chk_ia32, .-__memmove_chk_ia32 - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_memmove; __GI_memmove = __memmove_ia32 -# endif -#endif - -#include "../memmove.S" diff --git a/sysdeps/i386/i686/multiarch/memmove.c b/sysdeps/i386/i686/multiarch/memmove.c new file mode 100644 index 0000000000..9e5ad6dc1a --- /dev/null +++ b/sysdeps/i386/i686/multiarch/memmove.c @@ -0,0 +1,76 @@ +#include + +void * +__memmove (void *dst, const void *src, size_t n) +{ + const char *s = src; + char *d = dst; + void *ret = dst; + size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1); + size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1); + + if (offset_src != offset_dst) + { + if (s < d) + { + // backward copying + d += n; + s += n; + while (n--) + *--d = *--s; + } + else + // forward copying + while (n--) + *d++ = *s++; + } + else + { + if (s < d) + { + offset_src = (offset_src + (size_t)src) & (sizeof(size_t) - 1); + // backward copying + d += n; + s += n; + while (n-- && offset_src--) + *--d = *--s; + n++; + if (!n) return ret; + void **d1 = (void **)d; + void **s1 = (void **)s; + while (n >= sizeof(void *)) + { + n -= sizeof(void *); + *--d1 = *--s1; + } + s = (char *)s1; + d = (char *)d1; + while (n--) + *--d = *--s; + } + else + { + if (offset_src) offset_src = sizeof(size_t) - offset_src; + // forward copying + while (n-- && offset_src--) + *d++ = *s++; + n++; + if (!n) return ret; + void **d1 = (void **)d; + void **s1 = (void **)s; + while (n >= sizeof(void *)) + { + n -= sizeof(void *); + *d1++ = *s1++; + } + s = (char *)s1; + d = (char *)d1; + while (n--) + *d++ = *s++; + } + } + return ret; +} + +weak_alias (__memmove, __GI_memmove) +weak_alias (__memmove, memmove) diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.S b/sysdeps/i386/i686/multiarch/memmove_chk.S deleted file mode 100644 index 9552640d52..0000000000 --- a/sysdeps/i386/i686/multiarch/memmove_chk.S +++ /dev/null @@ -1,102 +0,0 @@ -/* Multiple versions of __memmove_chk - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2013 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - -/* Define multiple versions only for the definition in lib. */ -#ifndef NOT_IN_libc -# ifdef SHARED - .text -ENTRY(__memmove_chk) - .type __memmove_chk, @gnu_indirect_function - pushl %ebx - cfi_adjust_cfa_offset (4) - cfi_rel_offset (ebx, 0) - LOAD_PIC_REG(bx) - cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) - jne 1f - call __init_cpu_features -1: leal __memmove_chk_ia32@GOTOFF(%ebx), %eax - testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) - jz 2f - leal __memmove_chk_ssse3@GOTOFF(%ebx), %eax - testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) - jz 2f - leal __memmove_chk_ssse3_rep@GOTOFF(%ebx), %eax -2: popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - ret -END(__memmove_chk) -# else - .text -ENTRY(__memmove_chk) - .type __memmove_chk, @gnu_indirect_function - cmpl $0, KIND_OFFSET+__cpu_features - jne 1f - call __init_cpu_features -1: leal __memmove_chk_ia32, %eax - testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features - jz 2f - leal __memmove_chk_ssse3, %eax - testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features - jz 2f - leal __memmove_chk_ssse3_rep, %eax -2: ret -END(__memmove_chk) - - .type __memmove_chk_ssse3, @function - .p2align 4; -__memmove_chk_ssse3: - cfi_startproc - CALL_MCOUNT - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp __memmove_ssse3 - cfi_endproc - .size __memmove_chk_ssse3, .-__memmove_chk_ssse3 - - .type __memmove_chk_ssse3_rep, @function - .p2align 4; -__memmove_chk_ssse3_rep: - cfi_startproc - CALL_MCOUNT - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp __memmove_ssse3_rep - cfi_endproc - .size __memmove_chk_ssse3_rep, .-__memmove_chk_ssse3_rep - - .type __memmove_chk_ia32, @function - .p2align 4; -__memmove_chk_ia32: - cfi_startproc - CALL_MCOUNT - movl 12(%esp), %eax - cmpl %eax, 16(%esp) - jb __chk_fail - jmp __memmove_ia32 - cfi_endproc - .size __memmove_chk_ia32, .-__memmove_chk_ia32 -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.c b/sysdeps/i386/i686/multiarch/memmove_chk.c new file mode 100644 index 0000000000..bbf53d00d3 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/memmove_chk.c @@ -0,0 +1 @@ +#include diff --git a/sysdeps/i386/i686/multiarch/mempcpy.S b/sysdeps/i386/i686/multiarch/mempcpy.S deleted file mode 100644 index 83bd1f2075..0000000000 --- a/sysdeps/i386/i686/multiarch/mempcpy.S +++ /dev/null @@ -1,85 +0,0 @@ -/* Multiple versions of mempcpy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2013 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - -/* Define multiple versions only for the definition in lib and for - DSO. In static binaries we need mempcpy before the initialization - happened. */ -#if defined SHARED && !defined NOT_IN_libc - .text -ENTRY(__mempcpy) - .type __mempcpy, @gnu_indirect_function - pushl %ebx - cfi_adjust_cfa_offset (4) - cfi_rel_offset (ebx, 0) - LOAD_PIC_REG(bx) - cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) - jne 1f - call __init_cpu_features -1: leal __mempcpy_ia32@GOTOFF(%ebx), %eax - testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) - jz 2f - leal __mempcpy_ssse3@GOTOFF(%ebx), %eax - testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) - jz 2f - leal __mempcpy_ssse3_rep@GOTOFF(%ebx), %eax -2: popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - ret -END(__mempcpy) - -# undef ENTRY -# define ENTRY(name) \ - .type __mempcpy_ia32, @function; \ - .p2align 4; \ - .globl __mempcpy_ia32; \ - .hidden __mempcpy_ia32; \ - __mempcpy_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __mempcpy_ia32, .-__mempcpy_ia32 - -# undef ENTRY_CHK -# define ENTRY_CHK(name) \ - .type __mempcpy_chk_ia32, @function; \ - .globl __mempcpy_chk_ia32; \ - .p2align 4; \ - __mempcpy_chk_ia32: cfi_startproc; \ - CALL_MCOUNT -# undef END_CHK -# define END_CHK(name) \ - cfi_endproc; .size __mempcpy_chk_ia32, .-__mempcpy_chk_ia32 - -# undef libc_hidden_def -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_def(name) \ - .globl __GI_mempcpy; __GI_mempcpy = __mempcpy_ia32 -# define libc_hidden_builtin_def(name) \ - .globl __GI___mempcpy; __GI___mempcpy = __mempcpy_ia32 -#endif - -#include "../mempcpy.S" diff --git a/sysdeps/i386/i686/multiarch/mempcpy.c b/sysdeps/i386/i686/multiarch/mempcpy.c new file mode 100644 index 0000000000..6cbdad1f83 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/mempcpy.c @@ -0,0 +1,40 @@ +#include + +void * +mempcpy (void *dst, const void *src, size_t n) +{ + const char *s = src; + char *d = dst; + void *ret = dst + n; + size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1); + size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1); + + if (offset_src != offset_dst) + { + while (n--) + *d++ = *s++; + } + else + { + if (offset_src) offset_src = sizeof(size_t) - offset_src; + while (n-- && offset_src--) + *d++ = *s++; + n++; + if (!n) return ret; + void **d1 = (void **)d; + void **s1 = (void **)s; + while (n >= sizeof(void *)) + { + n -= sizeof(void *); + *d1++ = *s1++; + } + s = (char *)s1; + d = (char *)d1; + while (n--) + *d++ = *s++; + } + return ret; +} + +weak_alias (mempcpy, __GI_mempcpy) +weak_alias (mempcpy, __mempcpy) diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.S b/sysdeps/i386/i686/multiarch/mempcpy_chk.S deleted file mode 100644 index 7bd4eb1406..0000000000 --- a/sysdeps/i386/i686/multiarch/mempcpy_chk.S +++ /dev/null @@ -1,54 +0,0 @@ -/* Multiple versions of __mempcpy_chk - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2013 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - -/* Define multiple versions only for the definition in lib and for - DSO. There are no multiarch mempcpy functions for static binaries. - */ -#ifndef NOT_IN_libc -# ifdef SHARED - .text -ENTRY(__mempcpy_chk) - .type __mempcpy_chk, @gnu_indirect_function - pushl %ebx - cfi_adjust_cfa_offset (4) - cfi_rel_offset (ebx, 0) - LOAD_PIC_REG(bx) - cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) - jne 1f - call __init_cpu_features -1: leal __mempcpy_chk_ia32@GOTOFF(%ebx), %eax - testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) - jz 2f - leal __mempcpy_chk_ssse3@GOTOFF(%ebx), %eax - testl $bit_Fast_Rep_String, FEATURE_OFFSET+index_Fast_Rep_String+__cpu_features@GOTOFF(%ebx) - jz 2f - leal __mempcpy_chk_ssse3_rep@GOTOFF(%ebx), %eax -2: popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) - ret -END(__mempcpy_chk) -# else -# include "../mempcpy_chk.S" -# endif -#endif diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.c b/sysdeps/i386/i686/multiarch/mempcpy_chk.c new file mode 100644 index 0000000000..ba170784c3 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/mempcpy_chk.c @@ -0,0 +1 @@ +#include diff --git a/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S index c5c3e97f0f..75c947c0e3 100644 --- a/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S +++ b/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S @@ -45,6 +45,12 @@ ENTRY (MEMCHR) movd STR2(%esp), %xmm1 mov LEN(%esp), %edx +# ifdef __CHKP__ + bndldx STR1(%esp,%ecx,1), %bnd0 + bndcl (%ecx), %bnd0 + bndcu -1(%ecx, %edx), %bnd0 +# endif + sub $16, %edx jbe L(length_less16) diff --git a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S index bcea296a9a..ce112b1f66 100644 --- a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S +++ b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S @@ -90,6 +90,7 @@ ENTRY (__memset_sse2_rep) ENTRANCE movl LEN(%esp), %ecx + #ifdef USE_AS_BZERO xor %eax, %eax #else @@ -101,6 +102,11 @@ ENTRY (__memset_sse2_rep) or %edx, %eax #endif movl DEST(%esp), %edx +#ifdef __CHKP__ + bndldx DEST(%esp,%edx,1),%bnd0 + bndcl (%edx), %bnd0 + bndcu -1(%edx, %ecx), %bnd0 +#endif cmp $32, %ecx jae L(32bytesormore) diff --git a/sysdeps/i386/i686/multiarch/mpx_memcpy_nobnd.S b/sysdeps/i386/i686/multiarch/mpx_memcpy_nobnd.S new file mode 100644 index 0000000000..b7f4e0e2fd --- /dev/null +++ b/sysdeps/i386/i686/multiarch/mpx_memcpy_nobnd.S @@ -0,0 +1,1803 @@ +/* memcpy with SSSE3 and REP string. + Copyright (C) 2010-2013 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#include "asm-syntax.h" + +#ifndef MEMCPY +# define MEMCPY mpx_memcpy_nobnd +#endif + +#ifdef USE_AS_BCOPY +# define SRC PARMS +# define DEST SRC+4 +# define LEN DEST+4 +#else +# define DEST PARMS +# define SRC DEST+4 +# define LEN SRC+4 +#endif + +#define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +#define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +#define PUSH(REG) pushl REG; CFI_PUSH (REG) +#define POP(REG) popl REG; CFI_POP (REG) + +#ifdef SHARED +# define PARMS 8 /* Preserve EBX. */ +# define ENTRANCE PUSH (%ebx); +# define RETURN_END POP (%ebx); ret +# define RETURN RETURN_END; CFI_PUSH (%ebx) +# define JMPTBL(I, B) I - B + +/* Load an entry in a jump table into EBX and branch to it. TABLE is a + jump table with relative offsets. INDEX is a register contains the + index into the jump table. SCALE is the scale of INDEX. */ +# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ + /* We first load PC into EBX. */ \ + SETUP_PIC_REG(bx); \ + /* Get the address of the jump table. */ \ + addl $(TABLE - .), %ebx; \ + /* Get the entry and convert the relative offset to the \ + absolute address. */ \ + addl (%ebx,INDEX,SCALE), %ebx; \ + /* We loaded the jump table. Go. */ \ + jmp *%ebx + +# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) \ + addl $(TABLE - .), %ebx + +# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \ + addl (%ebx,INDEX,SCALE), %ebx; \ + /* We loaded the jump table. Go. */ \ + jmp *%ebx +#else +# define PARMS 4 +# define ENTRANCE +# define RETURN_END ret +# define RETURN RETURN_END +# define JMPTBL(I, B) I + +/* Branch to an entry in a jump table. TABLE is a jump table with + absolute offsets. INDEX is a register contains the index into the + jump table. SCALE is the scale of INDEX. */ +# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ + jmp *TABLE(,INDEX,SCALE) + +# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) + +# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \ + jmp *TABLE(,INDEX,SCALE) +#endif + + .section .text.ssse3,"ax",@progbits +ENTRY (MEMCPY) + ENTRANCE + movl LEN(%esp), %ecx + movl SRC(%esp), %eax + movl DEST(%esp), %edx + +#ifdef __CHKP__ + bndldx SRC(%esp,%eax,1), %bnd1 + bndldx DEST(%esp,%edx,1), %bnd0 + bndcl (%eax), %bnd1 + bndcu -1(%eax, %ecx), %bnd1 + bndcl (%edx), %bnd0 + bndcu -1(%edx, %ecx), %bnd0 +#endif + +#ifdef USE_AS_MEMMOVE + cmp %eax, %edx + jb L(copy_forward) + je L(fwd_write_0bytes) + cmp $48, %ecx + jb L(bk_write_less48bytes) + add %ecx, %eax + cmp %eax, %edx + movl SRC(%esp), %eax + jb L(copy_backward) + +L(copy_forward): +#endif + cmp $48, %ecx + jae L(48bytesormore) + +L(fwd_write_less32bytes): +#ifndef USE_AS_MEMMOVE + cmp %dl, %al + jb L(bk_write) +#endif + add %ecx, %edx + add %ecx, %eax + BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) +#ifndef USE_AS_MEMMOVE +L(bk_write): + BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) +#endif + + ALIGN (4) +/* ECX > 32 and EDX is 4 byte aligned. */ +L(48bytesormore): + movdqu (%eax), %xmm0 + PUSH (%edi) + movl %edx, %edi + and $-16, %edx + PUSH (%esi) + cfi_remember_state + add $16, %edx + movl %edi, %esi + sub %edx, %edi + add %edi, %ecx + sub %edi, %eax + +#ifdef SHARED_CACHE_SIZE_HALF + cmp $SHARED_CACHE_SIZE_HALF, %ecx +#else +# ifdef SHARED + SETUP_PIC_REG(bx) + add $_GLOBAL_OFFSET_TABLE_, %ebx + cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx +# else + cmp __x86_shared_cache_size_half, %ecx +# endif +#endif + + mov %eax, %edi + jae L(large_page) + and $0xf, %edi + jz L(shl_0) + + BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4) + + ALIGN (4) +L(shl_0): + movdqu %xmm0, (%esi) + xor %edi, %edi + cmp $127, %ecx + ja L(shl_0_gobble) + lea -32(%ecx), %ecx +L(shl_0_loop): + movdqa (%eax, %edi), %xmm0 + movdqa 16(%eax, %edi), %xmm1 + sub $32, %ecx + movdqa %xmm0, (%edx, %edi) + movdqa %xmm1, 16(%edx, %edi) + lea 32(%edi), %edi + jb L(shl_0_end) + + movdqa (%eax, %edi), %xmm0 + movdqa 16(%eax, %edi), %xmm1 + sub $32, %ecx + movdqa %xmm0, (%edx, %edi) + movdqa %xmm1, 16(%edx, %edi) + lea 32(%edi), %edi + jb L(shl_0_end) + + movdqa (%eax, %edi), %xmm0 + movdqa 16(%eax, %edi), %xmm1 + sub $32, %ecx + movdqa %xmm0, (%edx, %edi) + movdqa %xmm1, 16(%edx, %edi) + lea 32(%edi), %edi + jb L(shl_0_end) + + movdqa (%eax, %edi), %xmm0 + movdqa 16(%eax, %edi), %xmm1 + sub $32, %ecx + movdqa %xmm0, (%edx, %edi) + movdqa %xmm1, 16(%edx, %edi) + lea 32(%edi), %edi +L(shl_0_end): + lea 32(%ecx), %ecx + add %ecx, %edi + add %edi, %edx + add %edi, %eax + POP (%esi) + POP (%edi) + BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) + + cfi_restore_state + cfi_remember_state +L(shl_0_gobble): + +#ifdef DATA_CACHE_SIZE_HALF + cmp $DATA_CACHE_SIZE_HALF, %ecx +#else +# ifdef SHARED + SETUP_PIC_REG(bx) + add $_GLOBAL_OFFSET_TABLE_, %ebx + mov __x86_data_cache_size_half@GOTOFF(%ebx), %edi +# else + mov __x86_data_cache_size_half, %edi +# endif +#endif + mov %edi, %esi + shr $3, %esi + sub %esi, %edi + cmp %edi, %ecx + jae L(shl_0_gobble_mem_start) + sub $128, %ecx + ALIGN (4) +L(shl_0_gobble_cache_loop): + movdqa (%eax), %xmm0 + movaps 0x10(%eax), %xmm1 + movaps 0x20(%eax), %xmm2 + movaps 0x30(%eax), %xmm3 + movaps 0x40(%eax), %xmm4 + movaps 0x50(%eax), %xmm5 + movaps 0x60(%eax), %xmm6 + movaps 0x70(%eax), %xmm7 + lea 0x80(%eax), %eax + sub $128, %ecx + movdqa %xmm0, (%edx) + movaps %xmm1, 0x10(%edx) + movaps %xmm2, 0x20(%edx) + movaps %xmm3, 0x30(%edx) + movaps %xmm4, 0x40(%edx) + movaps %xmm5, 0x50(%edx) + movaps %xmm6, 0x60(%edx) + movaps %xmm7, 0x70(%edx) + lea 0x80(%edx), %edx + + jae L(shl_0_gobble_cache_loop) + add $0x80, %ecx + cmp $0x40, %ecx + jb L(shl_0_cache_less_64bytes) + + movdqa (%eax), %xmm0 + sub $0x40, %ecx + movdqa 0x10(%eax), %xmm1 + + movdqa %xmm0, (%edx) + movdqa %xmm1, 0x10(%edx) + + movdqa 0x20(%eax), %xmm0 + movdqa 0x30(%eax), %xmm1 + add $0x40, %eax + + movdqa %xmm0, 0x20(%edx) + movdqa %xmm1, 0x30(%edx) + add $0x40, %edx +L(shl_0_cache_less_64bytes): + cmp $0x20, %ecx + jb L(shl_0_cache_less_32bytes) + movdqa (%eax), %xmm0 + sub $0x20, %ecx + movdqa 0x10(%eax), %xmm1 + add $0x20, %eax + movdqa %xmm0, (%edx) + movdqa %xmm1, 0x10(%edx) + add $0x20, %edx +L(shl_0_cache_less_32bytes): + cmp $0x10, %ecx + jb L(shl_0_cache_less_16bytes) + sub $0x10, %ecx + movdqa (%eax), %xmm0 + add $0x10, %eax + movdqa %xmm0, (%edx) + add $0x10, %edx +L(shl_0_cache_less_16bytes): + add %ecx, %edx + add %ecx, %eax + POP (%esi) + POP (%edi) + BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) + + cfi_restore_state + cfi_remember_state + ALIGN (4) +L(shl_0_gobble_mem_start): + cmp %al, %dl + je L(copy_page_by_rep) + sub $128, %ecx +L(shl_0_gobble_mem_loop): + prefetchnta 0x1c0(%eax) + prefetchnta 0x280(%eax) + prefetchnta 0x1c0(%edx) + prefetchnta 0x280(%edx) + + movdqa (%eax), %xmm0 + movaps 0x10(%eax), %xmm1 + movaps 0x20(%eax), %xmm2 + movaps 0x30(%eax), %xmm3 + movaps 0x40(%eax), %xmm4 + movaps 0x50(%eax), %xmm5 + movaps 0x60(%eax), %xmm6 + movaps 0x70(%eax), %xmm7 + lea 0x80(%eax), %eax + sub $0x80, %ecx + movdqa %xmm0, (%edx) + movaps %xmm1, 0x10(%edx) + movaps %xmm2, 0x20(%edx) + movaps %xmm3, 0x30(%edx) + movaps %xmm4, 0x40(%edx) + movaps %xmm5, 0x50(%edx) + movaps %xmm6, 0x60(%edx) + movaps %xmm7, 0x70(%edx) + lea 0x80(%edx), %edx + + jae L(shl_0_gobble_mem_loop) + add $0x80, %ecx + cmp $0x40, %ecx + jb L(shl_0_mem_less_64bytes) + + movdqa (%eax), %xmm0 + sub $0x40, %ecx + movdqa 0x10(%eax), %xmm1 + + movdqa %xmm0, (%edx) + movdqa %xmm1, 0x10(%edx) + + movdqa 0x20(%eax), %xmm0 + movdqa 0x30(%eax), %xmm1 + add $0x40, %eax + + movdqa %xmm0, 0x20(%edx) + movdqa %xmm1, 0x30(%edx) + add $0x40, %edx +L(shl_0_mem_less_64bytes): + cmp $0x20, %ecx + jb L(shl_0_mem_less_32bytes) + movdqa (%eax), %xmm0 + sub $0x20, %ecx + movdqa 0x10(%eax), %xmm1 + add $0x20, %eax + movdqa %xmm0, (%edx) + movdqa %xmm1, 0x10(%edx) + add $0x20, %edx +L(shl_0_mem_less_32bytes): + cmp $0x10, %ecx + jb L(shl_0_mem_less_16bytes) + sub $0x10, %ecx + movdqa (%eax), %xmm0 + add $0x10, %eax + movdqa %xmm0, (%edx) + add $0x10, %edx +L(shl_0_mem_less_16bytes): + add %ecx, %edx + add %ecx, %eax + POP (%esi) + POP (%edi) + BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) + + cfi_restore_state + cfi_remember_state + ALIGN (4) +L(shl_1): + BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) + sub $1, %eax + movaps (%eax), %xmm1 + xor %edi, %edi + sub $32, %ecx + movdqu %xmm0, (%esi) + POP (%esi) +L(shl_1_loop): + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm4 + palignr $1, %xmm2, %xmm3 + palignr $1, %xmm1, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jb L(shl_1_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm1 + palignr $1, %xmm2, %xmm3 + palignr $1, %xmm4, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jae L(shl_1_loop) + +L(shl_1_end): + add $32, %ecx + add %ecx, %edi + add %edi, %edx + lea 1(%edi, %eax), %eax + POP (%edi) + BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + + cfi_restore_state + cfi_remember_state + ALIGN (4) +L(shl_2): + BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) + sub $2, %eax + movaps (%eax), %xmm1 + xor %edi, %edi + sub $32, %ecx + movdqu %xmm0, (%esi) + POP (%esi) +L(shl_2_loop): + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm4 + palignr $2, %xmm2, %xmm3 + palignr $2, %xmm1, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jb L(shl_2_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm1 + palignr $2, %xmm2, %xmm3 + palignr $2, %xmm4, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jae L(shl_2_loop) + +L(shl_2_end): + add $32, %ecx + add %ecx, %edi + add %edi, %edx + lea 2(%edi, %eax), %eax + POP (%edi) + BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + + cfi_restore_state + cfi_remember_state + ALIGN (4) +L(shl_3): + BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) + sub $3, %eax + movaps (%eax), %xmm1 + xor %edi, %edi + sub $32, %ecx + movdqu %xmm0, (%esi) + POP (%esi) +L(shl_3_loop): + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm4 + palignr $3, %xmm2, %xmm3 + palignr $3, %xmm1, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jb L(shl_3_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm1 + palignr $3, %xmm2, %xmm3 + palignr $3, %xmm4, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jae L(shl_3_loop) + +L(shl_3_end): + add $32, %ecx + add %ecx, %edi + add %edi, %edx + lea 3(%edi, %eax), %eax + POP (%edi) + BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + + cfi_restore_state + cfi_remember_state + ALIGN (4) +L(shl_4): + BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) + sub $4, %eax + movaps (%eax), %xmm1 + xor %edi, %edi + sub $32, %ecx + movdqu %xmm0, (%esi) + POP (%esi) +L(shl_4_loop): + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm4 + palignr $4, %xmm2, %xmm3 + palignr $4, %xmm1, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jb L(shl_4_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm1 + palignr $4, %xmm2, %xmm3 + palignr $4, %xmm4, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jae L(shl_4_loop) + +L(shl_4_end): + add $32, %ecx + add %ecx, %edi + add %edi, %edx + lea 4(%edi, %eax), %eax + POP (%edi) + BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + + cfi_restore_state + cfi_remember_state + ALIGN (4) +L(shl_5): + BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) + sub $5, %eax + movaps (%eax), %xmm1 + xor %edi, %edi + sub $32, %ecx + movdqu %xmm0, (%esi) + POP (%esi) +L(shl_5_loop): + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm4 + palignr $5, %xmm2, %xmm3 + palignr $5, %xmm1, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jb L(shl_5_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm1 + palignr $5, %xmm2, %xmm3 + palignr $5, %xmm4, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jae L(shl_5_loop) + +L(shl_5_end): + add $32, %ecx + add %ecx, %edi + add %edi, %edx + lea 5(%edi, %eax), %eax + POP (%edi) + BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + + cfi_restore_state + cfi_remember_state + ALIGN (4) +L(shl_6): + BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) + sub $6, %eax + movaps (%eax), %xmm1 + xor %edi, %edi + sub $32, %ecx + movdqu %xmm0, (%esi) + POP (%esi) +L(shl_6_loop): + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm4 + palignr $6, %xmm2, %xmm3 + palignr $6, %xmm1, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jb L(shl_6_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm1 + palignr $6, %xmm2, %xmm3 + palignr $6, %xmm4, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jae L(shl_6_loop) + +L(shl_6_end): + add $32, %ecx + add %ecx, %edi + add %edi, %edx + lea 6(%edi, %eax), %eax + POP (%edi) + BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + + cfi_restore_state + cfi_remember_state + ALIGN (4) +L(shl_7): + BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) + sub $7, %eax + movaps (%eax), %xmm1 + xor %edi, %edi + sub $32, %ecx + movdqu %xmm0, (%esi) + POP (%esi) +L(shl_7_loop): + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm4 + palignr $7, %xmm2, %xmm3 + palignr $7, %xmm1, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jb L(shl_7_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm1 + palignr $7, %xmm2, %xmm3 + palignr $7, %xmm4, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jae L(shl_7_loop) + +L(shl_7_end): + add $32, %ecx + add %ecx, %edi + add %edi, %edx + lea 7(%edi, %eax), %eax + POP (%edi) + BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + + cfi_restore_state + cfi_remember_state + ALIGN (4) +L(shl_8): + BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) + sub $8, %eax + movaps (%eax), %xmm1 + xor %edi, %edi + sub $32, %ecx + movdqu %xmm0, (%esi) + POP (%esi) +L(shl_8_loop): + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm4 + palignr $8, %xmm2, %xmm3 + palignr $8, %xmm1, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jb L(shl_8_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm1 + palignr $8, %xmm2, %xmm3 + palignr $8, %xmm4, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jae L(shl_8_loop) + +L(shl_8_end): + add $32, %ecx + add %ecx, %edi + add %edi, %edx + lea 8(%edi, %eax), %eax + POP (%edi) + BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + + cfi_restore_state + cfi_remember_state + ALIGN (4) +L(shl_9): + BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) + sub $9, %eax + movaps (%eax), %xmm1 + xor %edi, %edi + sub $32, %ecx + movdqu %xmm0, (%esi) + POP (%esi) +L(shl_9_loop): + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm4 + palignr $9, %xmm2, %xmm3 + palignr $9, %xmm1, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jb L(shl_9_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm1 + palignr $9, %xmm2, %xmm3 + palignr $9, %xmm4, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jae L(shl_9_loop) + +L(shl_9_end): + add $32, %ecx + add %ecx, %edi + add %edi, %edx + lea 9(%edi, %eax), %eax + POP (%edi) + BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + + cfi_restore_state + cfi_remember_state + ALIGN (4) +L(shl_10): + BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) + sub $10, %eax + movaps (%eax), %xmm1 + xor %edi, %edi + sub $32, %ecx + movdqu %xmm0, (%esi) + POP (%esi) +L(shl_10_loop): + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm4 + palignr $10, %xmm2, %xmm3 + palignr $10, %xmm1, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jb L(shl_10_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm1 + palignr $10, %xmm2, %xmm3 + palignr $10, %xmm4, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jae L(shl_10_loop) + +L(shl_10_end): + add $32, %ecx + add %ecx, %edi + add %edi, %edx + lea 10(%edi, %eax), %eax + POP (%edi) + BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + + cfi_restore_state + cfi_remember_state + ALIGN (4) +L(shl_11): + BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) + sub $11, %eax + movaps (%eax), %xmm1 + xor %edi, %edi + sub $32, %ecx + movdqu %xmm0, (%esi) + POP (%esi) +L(shl_11_loop): + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm4 + palignr $11, %xmm2, %xmm3 + palignr $11, %xmm1, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jb L(shl_11_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm1 + palignr $11, %xmm2, %xmm3 + palignr $11, %xmm4, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jae L(shl_11_loop) + +L(shl_11_end): + add $32, %ecx + add %ecx, %edi + add %edi, %edx + lea 11(%edi, %eax), %eax + POP (%edi) + BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + + cfi_restore_state + cfi_remember_state + ALIGN (4) +L(shl_12): + BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) + sub $12, %eax + movaps (%eax), %xmm1 + xor %edi, %edi + sub $32, %ecx + movdqu %xmm0, (%esi) + POP (%esi) +L(shl_12_loop): + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm4 + palignr $12, %xmm2, %xmm3 + palignr $12, %xmm1, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jb L(shl_12_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm1 + palignr $12, %xmm2, %xmm3 + palignr $12, %xmm4, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jae L(shl_12_loop) + +L(shl_12_end): + add $32, %ecx + add %ecx, %edi + add %edi, %edx + lea 12(%edi, %eax), %eax + POP (%edi) + BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + + cfi_restore_state + cfi_remember_state + ALIGN (4) +L(shl_13): + BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) + sub $13, %eax + movaps (%eax), %xmm1 + xor %edi, %edi + sub $32, %ecx + movdqu %xmm0, (%esi) + POP (%esi) +L(shl_13_loop): + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm4 + palignr $13, %xmm2, %xmm3 + palignr $13, %xmm1, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jb L(shl_13_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm1 + palignr $13, %xmm2, %xmm3 + palignr $13, %xmm4, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jae L(shl_13_loop) + +L(shl_13_end): + add $32, %ecx + add %ecx, %edi + add %edi, %edx + lea 13(%edi, %eax), %eax + POP (%edi) + BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + + cfi_restore_state + cfi_remember_state + ALIGN (4) +L(shl_14): + BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) + sub $14, %eax + movaps (%eax), %xmm1 + xor %edi, %edi + sub $32, %ecx + movdqu %xmm0, (%esi) + POP (%esi) +L(shl_14_loop): + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm4 + palignr $14, %xmm2, %xmm3 + palignr $14, %xmm1, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jb L(shl_14_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm1 + palignr $14, %xmm2, %xmm3 + palignr $14, %xmm4, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jae L(shl_14_loop) + +L(shl_14_end): + add $32, %ecx + add %ecx, %edi + add %edi, %edx + lea 14(%edi, %eax), %eax + POP (%edi) + BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + + cfi_restore_state + cfi_remember_state + ALIGN (4) +L(shl_15): + BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) + sub $15, %eax + movaps (%eax), %xmm1 + xor %edi, %edi + sub $32, %ecx + movdqu %xmm0, (%esi) + POP (%esi) +L(shl_15_loop): + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm4 + palignr $15, %xmm2, %xmm3 + palignr $15, %xmm1, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jb L(shl_15_end) + + movdqa 16(%eax, %edi), %xmm2 + sub $32, %ecx + movdqa 32(%eax, %edi), %xmm3 + movdqa %xmm3, %xmm1 + palignr $15, %xmm2, %xmm3 + palignr $15, %xmm4, %xmm2 + lea 32(%edi), %edi + movdqa %xmm2, -32(%edx, %edi) + movdqa %xmm3, -16(%edx, %edi) + + jae L(shl_15_loop) + +L(shl_15_end): + add $32, %ecx + add %ecx, %edi + add %edi, %edx + lea 15(%edi, %eax), %eax + POP (%edi) + BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + + + ALIGN (4) +L(fwd_write_44bytes): + movl -44(%eax), %ecx + movl %ecx, -44(%edx) +L(fwd_write_40bytes): + movl -40(%eax), %ecx + movl %ecx, -40(%edx) +L(fwd_write_36bytes): + movl -36(%eax), %ecx + movl %ecx, -36(%edx) +L(fwd_write_32bytes): + movl -32(%eax), %ecx + movl %ecx, -32(%edx) +L(fwd_write_28bytes): + movl -28(%eax), %ecx + movl %ecx, -28(%edx) +L(fwd_write_24bytes): + movl -24(%eax), %ecx + movl %ecx, -24(%edx) +L(fwd_write_20bytes): + movl -20(%eax), %ecx + movl %ecx, -20(%edx) +L(fwd_write_16bytes): + movl -16(%eax), %ecx + movl %ecx, -16(%edx) +L(fwd_write_12bytes): + movl -12(%eax), %ecx + movl %ecx, -12(%edx) +L(fwd_write_8bytes): + movl -8(%eax), %ecx + movl %ecx, -8(%edx) +L(fwd_write_4bytes): + movl -4(%eax), %ecx + movl %ecx, -4(%edx) +L(fwd_write_0bytes): +#ifndef USE_AS_BCOPY +# ifdef USE_AS_MEMPCPY + movl %edx, %eax +# else + movl DEST(%esp), %eax +# endif +#endif + RETURN + + ALIGN (4) +L(fwd_write_5bytes): + movl -5(%eax), %ecx + movl -4(%eax), %eax + movl %ecx, -5(%edx) + movl %eax, -4(%edx) +#ifndef USE_AS_BCOPY +# ifdef USE_AS_MEMPCPY + movl %edx, %eax +# else + movl DEST(%esp), %eax +# endif +#endif + RETURN + + ALIGN (4) +L(fwd_write_45bytes): + movl -45(%eax), %ecx + movl %ecx, -45(%edx) +L(fwd_write_41bytes): + movl -41(%eax), %ecx + movl %ecx, -41(%edx) +L(fwd_write_37bytes): + movl -37(%eax), %ecx + movl %ecx, -37(%edx) +L(fwd_write_33bytes): + movl -33(%eax), %ecx + movl %ecx, -33(%edx) +L(fwd_write_29bytes): + movl -29(%eax), %ecx + movl %ecx, -29(%edx) +L(fwd_write_25bytes): + movl -25(%eax), %ecx + movl %ecx, -25(%edx) +L(fwd_write_21bytes): + movl -21(%eax), %ecx + movl %ecx, -21(%edx) +L(fwd_write_17bytes): + movl -17(%eax), %ecx + movl %ecx, -17(%edx) +L(fwd_write_13bytes): + movl -13(%eax), %ecx + movl %ecx, -13(%edx) +L(fwd_write_9bytes): + movl -9(%eax), %ecx + movl %ecx, -9(%edx) + movl -5(%eax), %ecx + movl %ecx, -5(%edx) +L(fwd_write_1bytes): + movzbl -1(%eax), %ecx + movb %cl, -1(%edx) +#ifndef USE_AS_BCOPY +# ifdef USE_AS_MEMPCPY + movl %edx, %eax +# else + movl DEST(%esp), %eax +# endif +#endif + RETURN + + ALIGN (4) +L(fwd_write_46bytes): + movl -46(%eax), %ecx + movl %ecx, -46(%edx) +L(fwd_write_42bytes): + movl -42(%eax), %ecx + movl %ecx, -42(%edx) +L(fwd_write_38bytes): + movl -38(%eax), %ecx + movl %ecx, -38(%edx) +L(fwd_write_34bytes): + movl -34(%eax), %ecx + movl %ecx, -34(%edx) +L(fwd_write_30bytes): + movl -30(%eax), %ecx + movl %ecx, -30(%edx) +L(fwd_write_26bytes): + movl -26(%eax), %ecx + movl %ecx, -26(%edx) +L(fwd_write_22bytes): + movl -22(%eax), %ecx + movl %ecx, -22(%edx) +L(fwd_write_18bytes): + movl -18(%eax), %ecx + movl %ecx, -18(%edx) +L(fwd_write_14bytes): + movl -14(%eax), %ecx + movl %ecx, -14(%edx) +L(fwd_write_10bytes): + movl -10(%eax), %ecx + movl %ecx, -10(%edx) +L(fwd_write_6bytes): + movl -6(%eax), %ecx + movl %ecx, -6(%edx) +L(fwd_write_2bytes): + movzwl -2(%eax), %ecx + movw %cx, -2(%edx) +#ifndef USE_AS_BCOPY +# ifdef USE_AS_MEMPCPY + movl %edx, %eax +# else + movl DEST(%esp), %eax +# endif +#endif + RETURN + + ALIGN (4) +L(fwd_write_47bytes): + movl -47(%eax), %ecx + movl %ecx, -47(%edx) +L(fwd_write_43bytes): + movl -43(%eax), %ecx + movl %ecx, -43(%edx) +L(fwd_write_39bytes): + movl -39(%eax), %ecx + movl %ecx, -39(%edx) +L(fwd_write_35bytes): + movl -35(%eax), %ecx + movl %ecx, -35(%edx) +L(fwd_write_31bytes): + movl -31(%eax), %ecx + movl %ecx, -31(%edx) +L(fwd_write_27bytes): + movl -27(%eax), %ecx + movl %ecx, -27(%edx) +L(fwd_write_23bytes): + movl -23(%eax), %ecx + movl %ecx, -23(%edx) +L(fwd_write_19bytes): + movl -19(%eax), %ecx + movl %ecx, -19(%edx) +L(fwd_write_15bytes): + movl -15(%eax), %ecx + movl %ecx, -15(%edx) +L(fwd_write_11bytes): + movl -11(%eax), %ecx + movl %ecx, -11(%edx) +L(fwd_write_7bytes): + movl -7(%eax), %ecx + movl %ecx, -7(%edx) +L(fwd_write_3bytes): + movzwl -3(%eax), %ecx + movzbl -1(%eax), %eax + movw %cx, -3(%edx) + movb %al, -1(%edx) +#ifndef USE_AS_BCOPY +# ifdef USE_AS_MEMPCPY + movl %edx, %eax +# else + movl DEST(%esp), %eax +# endif +#endif + RETURN_END + + cfi_restore_state + cfi_remember_state + ALIGN (4) +L(large_page): + movdqu (%eax), %xmm1 + movdqu %xmm0, (%esi) + movntdq %xmm1, (%edx) + add $0x10, %eax + add $0x10, %edx + sub $0x10, %ecx + cmp %al, %dl + je L(copy_page_by_rep) +L(large_page_loop_init): + POP (%esi) + sub $0x80, %ecx + POP (%edi) +L(large_page_loop): + prefetchnta 0x1c0(%eax) + prefetchnta 0x280(%eax) + movdqu (%eax), %xmm0 + movdqu 0x10(%eax), %xmm1 + movdqu 0x20(%eax), %xmm2 + movdqu 0x30(%eax), %xmm3 + movdqu 0x40(%eax), %xmm4 + movdqu 0x50(%eax), %xmm5 + movdqu 0x60(%eax), %xmm6 + movdqu 0x70(%eax), %xmm7 + lea 0x80(%eax), %eax + lfence + sub $0x80, %ecx + movntdq %xmm0, (%edx) + movntdq %xmm1, 0x10(%edx) + movntdq %xmm2, 0x20(%edx) + movntdq %xmm3, 0x30(%edx) + movntdq %xmm4, 0x40(%edx) + movntdq %xmm5, 0x50(%edx) + movntdq %xmm6, 0x60(%edx) + movntdq %xmm7, 0x70(%edx) + lea 0x80(%edx), %edx + jae L(large_page_loop) + add $0x80, %ecx + cmp $0x40, %ecx + jb L(large_page_less_64bytes) + + movdqu (%eax), %xmm0 + movdqu 0x10(%eax), %xmm1 + movdqu 0x20(%eax), %xmm2 + movdqu 0x30(%eax), %xmm3 + lea 0x40(%eax), %eax + + movntdq %xmm0, (%edx) + movntdq %xmm1, 0x10(%edx) + movntdq %xmm2, 0x20(%edx) + movntdq %xmm3, 0x30(%edx) + lea 0x40(%edx), %edx + sub $0x40, %ecx +L(large_page_less_64bytes): + cmp $32, %ecx + jb L(large_page_less_32bytes) + movdqu (%eax), %xmm0 + movdqu 0x10(%eax), %xmm1 + lea 0x20(%eax), %eax + movntdq %xmm0, (%edx) + movntdq %xmm1, 0x10(%edx) + lea 0x20(%edx), %edx + sub $0x20, %ecx +L(large_page_less_32bytes): + add %ecx, %edx + add %ecx, %eax + sfence + BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) + + cfi_restore_state + cfi_remember_state + ALIGN (4) +L(copy_page_by_rep): + mov %eax, %esi + mov %edx, %edi + mov %ecx, %edx + shr $2, %ecx + and $3, %edx + rep movsl + jz L(copy_page_by_rep_exit) + cmp $2, %edx + jb L(copy_page_by_rep_left_1) + movzwl (%esi), %eax + movw %ax, (%edi) + add $2, %esi + add $2, %edi + sub $2, %edx + jz L(copy_page_by_rep_exit) +L(copy_page_by_rep_left_1): + movzbl (%esi), %eax + movb %al, (%edi) +L(copy_page_by_rep_exit): + POP (%esi) + POP (%edi) +#ifndef USE_AS_BCOPY + movl DEST(%esp), %eax +# ifdef USE_AS_MEMPCPY + movl LEN(%esp), %ecx + add %ecx, %eax +# endif +#endif + RETURN + + ALIGN (4) +L(bk_write_44bytes): + movl 40(%eax), %ecx + movl %ecx, 40(%edx) +L(bk_write_40bytes): + movl 36(%eax), %ecx + movl %ecx, 36(%edx) +L(bk_write_36bytes): + movl 32(%eax), %ecx + movl %ecx, 32(%edx) +L(bk_write_32bytes): + movl 28(%eax), %ecx + movl %ecx, 28(%edx) +L(bk_write_28bytes): + movl 24(%eax), %ecx + movl %ecx, 24(%edx) +L(bk_write_24bytes): + movl 20(%eax), %ecx + movl %ecx, 20(%edx) +L(bk_write_20bytes): + movl 16(%eax), %ecx + movl %ecx, 16(%edx) +L(bk_write_16bytes): + movl 12(%eax), %ecx + movl %ecx, 12(%edx) +L(bk_write_12bytes): + movl 8(%eax), %ecx + movl %ecx, 8(%edx) +L(bk_write_8bytes): + movl 4(%eax), %ecx + movl %ecx, 4(%edx) +L(bk_write_4bytes): + movl (%eax), %ecx + movl %ecx, (%edx) +L(bk_write_0bytes): +#ifndef USE_AS_BCOPY + movl DEST(%esp), %eax +# ifdef USE_AS_MEMPCPY + movl LEN(%esp), %ecx + add %ecx, %eax +# endif +#endif + RETURN + + ALIGN (4) +L(bk_write_45bytes): + movl 41(%eax), %ecx + movl %ecx, 41(%edx) +L(bk_write_41bytes): + movl 37(%eax), %ecx + movl %ecx, 37(%edx) +L(bk_write_37bytes): + movl 33(%eax), %ecx + movl %ecx, 33(%edx) +L(bk_write_33bytes): + movl 29(%eax), %ecx + movl %ecx, 29(%edx) +L(bk_write_29bytes): + movl 25(%eax), %ecx + movl %ecx, 25(%edx) +L(bk_write_25bytes): + movl 21(%eax), %ecx + movl %ecx, 21(%edx) +L(bk_write_21bytes): + movl 17(%eax), %ecx + movl %ecx, 17(%edx) +L(bk_write_17bytes): + movl 13(%eax), %ecx + movl %ecx, 13(%edx) +L(bk_write_13bytes): + movl 9(%eax), %ecx + movl %ecx, 9(%edx) +L(bk_write_9bytes): + movl 5(%eax), %ecx + movl %ecx, 5(%edx) +L(bk_write_5bytes): + movl 1(%eax), %ecx + movl %ecx, 1(%edx) +L(bk_write_1bytes): + movzbl (%eax), %ecx + movb %cl, (%edx) +#ifndef USE_AS_BCOPY + movl DEST(%esp), %eax +# ifdef USE_AS_MEMPCPY + movl LEN(%esp), %ecx + add %ecx, %eax +# endif +#endif + RETURN + + ALIGN (4) +L(bk_write_46bytes): + movl 42(%eax), %ecx + movl %ecx, 42(%edx) +L(bk_write_42bytes): + movl 38(%eax), %ecx + movl %ecx, 38(%edx) +L(bk_write_38bytes): + movl 34(%eax), %ecx + movl %ecx, 34(%edx) +L(bk_write_34bytes): + movl 30(%eax), %ecx + movl %ecx, 30(%edx) +L(bk_write_30bytes): + movl 26(%eax), %ecx + movl %ecx, 26(%edx) +L(bk_write_26bytes): + movl 22(%eax), %ecx + movl %ecx, 22(%edx) +L(bk_write_22bytes): + movl 18(%eax), %ecx + movl %ecx, 18(%edx) +L(bk_write_18bytes): + movl 14(%eax), %ecx + movl %ecx, 14(%edx) +L(bk_write_14bytes): + movl 10(%eax), %ecx + movl %ecx, 10(%edx) +L(bk_write_10bytes): + movl 6(%eax), %ecx + movl %ecx, 6(%edx) +L(bk_write_6bytes): + movl 2(%eax), %ecx + movl %ecx, 2(%edx) +L(bk_write_2bytes): + movzwl (%eax), %ecx + movw %cx, (%edx) +#ifndef USE_AS_BCOPY + movl DEST(%esp), %eax +# ifdef USE_AS_MEMPCPY + movl LEN(%esp), %ecx + add %ecx, %eax +# endif +#endif + RETURN + + ALIGN (4) +L(bk_write_47bytes): + movl 43(%eax), %ecx + movl %ecx, 43(%edx) +L(bk_write_43bytes): + movl 39(%eax), %ecx + movl %ecx, 39(%edx) +L(bk_write_39bytes): + movl 35(%eax), %ecx + movl %ecx, 35(%edx) +L(bk_write_35bytes): + movl 31(%eax), %ecx + movl %ecx, 31(%edx) +L(bk_write_31bytes): + movl 27(%eax), %ecx + movl %ecx, 27(%edx) +L(bk_write_27bytes): + movl 23(%eax), %ecx + movl %ecx, 23(%edx) +L(bk_write_23bytes): + movl 19(%eax), %ecx + movl %ecx, 19(%edx) +L(bk_write_19bytes): + movl 15(%eax), %ecx + movl %ecx, 15(%edx) +L(bk_write_15bytes): + movl 11(%eax), %ecx + movl %ecx, 11(%edx) +L(bk_write_11bytes): + movl 7(%eax), %ecx + movl %ecx, 7(%edx) +L(bk_write_7bytes): + movl 3(%eax), %ecx + movl %ecx, 3(%edx) +L(bk_write_3bytes): + movzwl 1(%eax), %ecx + movw %cx, 1(%edx) + movzbl (%eax), %eax + movb %al, (%edx) +#ifndef USE_AS_BCOPY + movl DEST(%esp), %eax +# ifdef USE_AS_MEMPCPY + movl LEN(%esp), %ecx + add %ecx, %eax +# endif +#endif + RETURN_END + + + .pushsection .rodata.ssse3,"a",@progbits + ALIGN (2) +L(table_48bytes_fwd): + .int JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd)) + .int JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd)) + + ALIGN (2) +L(shl_table): + .int JMPTBL (L(shl_0), L(shl_table)) + .int JMPTBL (L(shl_1), L(shl_table)) + .int JMPTBL (L(shl_2), L(shl_table)) + .int JMPTBL (L(shl_3), L(shl_table)) + .int JMPTBL (L(shl_4), L(shl_table)) + .int JMPTBL (L(shl_5), L(shl_table)) + .int JMPTBL (L(shl_6), L(shl_table)) + .int JMPTBL (L(shl_7), L(shl_table)) + .int JMPTBL (L(shl_8), L(shl_table)) + .int JMPTBL (L(shl_9), L(shl_table)) + .int JMPTBL (L(shl_10), L(shl_table)) + .int JMPTBL (L(shl_11), L(shl_table)) + .int JMPTBL (L(shl_12), L(shl_table)) + .int JMPTBL (L(shl_13), L(shl_table)) + .int JMPTBL (L(shl_14), L(shl_table)) + .int JMPTBL (L(shl_15), L(shl_table)) + + ALIGN (2) +L(table_48_bytes_bwd): + .int JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd)) + .int JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd)) + + .popsection + +#ifdef USE_AS_MEMMOVE + ALIGN (4) +L(copy_backward): + PUSH (%esi) + movl %eax, %esi + add %ecx, %edx + add %ecx, %esi + testl $0x3, %edx + jnz L(bk_align) + +L(bk_aligned_4): + cmp $64, %ecx + jae L(bk_write_more64bytes) + +L(bk_write_64bytesless): + cmp $32, %ecx + jb L(bk_write_less32bytes) + +L(bk_write_more32bytes): + /* Copy 32 bytes at a time. */ + sub $32, %ecx + movl -4(%esi), %eax + movl %eax, -4(%edx) + movl -8(%esi), %eax + movl %eax, -8(%edx) + movl -12(%esi), %eax + movl %eax, -12(%edx) + movl -16(%esi), %eax + movl %eax, -16(%edx) + movl -20(%esi), %eax + movl %eax, -20(%edx) + movl -24(%esi), %eax + movl %eax, -24(%edx) + movl -28(%esi), %eax + movl %eax, -28(%edx) + movl -32(%esi), %eax + movl %eax, -32(%edx) + sub $32, %edx + sub $32, %esi + +L(bk_write_less32bytes): + movl %esi, %eax + sub %ecx, %edx + sub %ecx, %eax + POP (%esi) +L(bk_write_less48bytes): + BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) + + CFI_PUSH (%esi) + ALIGN (4) +L(bk_align): + cmp $8, %ecx + jbe L(bk_write_less32bytes) + testl $1, %edx + /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0, + then (EDX & 2) must be != 0. */ + jz L(bk_got2) + sub $1, %esi + sub $1, %ecx + sub $1, %edx + movzbl (%esi), %eax + movb %al, (%edx) + + testl $2, %edx + jz L(bk_aligned_4) + +L(bk_got2): + sub $2, %esi + sub $2, %ecx + sub $2, %edx + movzwl (%esi), %eax + movw %ax, (%edx) + jmp L(bk_aligned_4) + + ALIGN (4) +L(bk_write_more64bytes): + /* Check alignment of last byte. */ + testl $15, %edx + jz L(bk_ssse3_cpy_pre) + +/* EDX is aligned 4 bytes, but not 16 bytes. */ +L(bk_ssse3_align): + sub $4, %esi + sub $4, %ecx + sub $4, %edx + movl (%esi), %eax + movl %eax, (%edx) + + testl $15, %edx + jz L(bk_ssse3_cpy_pre) + + sub $4, %esi + sub $4, %ecx + sub $4, %edx + movl (%esi), %eax + movl %eax, (%edx) + + testl $15, %edx + jz L(bk_ssse3_cpy_pre) + + sub $4, %esi + sub $4, %ecx + sub $4, %edx + movl (%esi), %eax + movl %eax, (%edx) + +L(bk_ssse3_cpy_pre): + cmp $64, %ecx + jb L(bk_write_more32bytes) + +L(bk_ssse3_cpy): + sub $64, %esi + sub $64, %ecx + sub $64, %edx + movdqu 0x30(%esi), %xmm3 + movdqa %xmm3, 0x30(%edx) + movdqu 0x20(%esi), %xmm2 + movdqa %xmm2, 0x20(%edx) + movdqu 0x10(%esi), %xmm1 + movdqa %xmm1, 0x10(%edx) + movdqu (%esi), %xmm0 + movdqa %xmm0, (%edx) + cmp $64, %ecx + jae L(bk_ssse3_cpy) + jmp L(bk_write_64bytesless) + +#endif + +END (MEMCPY) diff --git a/sysdeps/i386/i686/multiarch/mpx_memmove_nobnd.S b/sysdeps/i386/i686/multiarch/mpx_memmove_nobnd.S new file mode 100644 index 0000000000..caaa89aea8 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/mpx_memmove_nobnd.S @@ -0,0 +1,3 @@ +#define USE_AS_MEMMOVE +#define MEMCPY mpx_memmove_nobnd +#include "mpx_memcpy_nobnd.S" diff --git a/sysdeps/i386/i686/multiarch/mpx_mempcpy_nobnd.S b/sysdeps/i386/i686/multiarch/mpx_mempcpy_nobnd.S new file mode 100644 index 0000000000..4b0af499e4 --- /dev/null +++ b/sysdeps/i386/i686/multiarch/mpx_mempcpy_nobnd.S @@ -0,0 +1,3 @@ +#define USE_AS_MEMPCPY +#define MEMCPY mpx_mempcpy_nobnd +#include "mpx_memcpy_nobnd.S" diff --git a/sysdeps/i386/i686/multiarch/strcat-sse2.S b/sysdeps/i386/i686/multiarch/strcat-sse2.S index 62d60cdb78..b1d39ae5cb 100644 --- a/sysdeps/i386/i686/multiarch/strcat-sse2.S +++ b/sysdeps/i386/i686/multiarch/strcat-sse2.S @@ -94,11 +94,21 @@ ENTRY (STRCAT) movl LEN(%esp), %ebx test %ebx, %ebx jz L(ExitZero) +# endif +# ifdef __CHKP__ + bndldx STR1(%esp,%eax,1), %bnd0 + bndldx STR2(%esp,%esi,1), %bnd1 + bndcl (%esi), %bnd1 + bndcu (%esi), %bnd1 # endif cmpb $0, (%esi) mov %esi, %ecx mov %eax, %edx jz L(ExitZero) +# ifdef __CHKP__ + bndcl (%eax), %bnd0 + bndcu (%eax), %bnd0 +# endif and $63, %ecx and $63, %edx @@ -113,6 +123,9 @@ ENTRY (STRCAT) movdqu (%eax), %xmm1 movdqu (%esi), %xmm5 pcmpeqb %xmm1, %xmm0 +# ifdef __CHKP__ + bndcu 16(%esi), %bnd1 +# endif movdqu 16(%esi), %xmm6 pmovmskb %xmm0, %ecx pcmpeqb %xmm5, %xmm4 @@ -132,6 +145,9 @@ L(alignment_prolog): and $-16, %eax pcmpeqb (%eax), %xmm0 movdqu (%esi), %xmm5 +# ifdef __CHKP__ + bndcu 16(%esi), %bnd1 +# endif movdqu 16(%esi), %xmm6 pmovmskb %xmm0, %edx pcmpeqb %xmm5, %xmm4 @@ -148,21 +164,33 @@ L(loop_prolog): pxor %xmm3, %xmm3 .p2align 4 L(align16_loop): +# ifdef __CHKP__ + bndcu 16(%eax), %bnd0 +# endif pcmpeqb 16(%eax), %xmm0 pmovmskb %xmm0, %edx test %edx, %edx jnz L(exit16) +# ifdef __CHKP__ + bndcu 32(%eax), %bnd0 +# endif pcmpeqb 32(%eax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx jnz L(exit32) +# ifdef __CHKP__ + bndcu 48(%eax), %bnd0 +# endif pcmpeqb 48(%eax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx jnz L(exit48) +# ifdef __CHKP__ + bndcu 64(%eax), %bnd0 +# endif pcmpeqb 64(%eax), %xmm3 pmovmskb %xmm3, %edx lea 64(%eax), %eax @@ -212,6 +240,9 @@ L(StartStrcpyPart): test %edx, %edx jnz L(CopyFrom1To16BytesTail1) +# ifdef __CHKP__ + bndcu 15(%eax), %bnd0 +# endif movdqu %xmm5, (%eax) pmovmskb %xmm7, %edx # ifdef USE_AS_STRNCAT @@ -250,21 +281,33 @@ L(StrlenCore7_1): .p2align 4 L(align16_loop_1): +# ifdef __CHKP__ + bndcu 16(%eax), %bnd0 +# endif pcmpeqb 16(%eax), %xmm0 pmovmskb %xmm0, %edx test %edx, %edx jnz L(exit16_1) +# ifdef __CHKP__ + bndcu 32(%eax), %bnd0 +# endif pcmpeqb 32(%eax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx jnz L(exit32_1) +# ifdef __CHKP__ + bndcu 48(%eax), %bnd0 +# endif pcmpeqb 48(%eax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx jnz L(exit48_1) +# ifdef __CHKP__ + bndcu 64(%eax), %bnd0 +# endif pcmpeqb 64(%eax), %xmm3 pmovmskb %xmm3, %edx lea 64(%eax), %eax @@ -323,6 +366,9 @@ L(StartStrcpyPart_1): test %edx, %edx jnz L(CopyFrom1To16BytesTail) +# ifdef __CHKP__ + bndcu 16(%esi), %bnd1 +# endif pcmpeqb 16(%esi), %xmm0 pmovmskb %xmm0, %edx # ifdef USE_AS_STRNCAT @@ -341,6 +387,9 @@ L(Unalign16Both): mov $16, %ecx movdqa (%esi, %ecx), %xmm1 movaps 16(%esi, %ecx), %xmm2 +# ifdef __CHKP__ + bndcu 15(%eax, %ecx), %bnd0 +# endif movdqu %xmm1, (%eax, %ecx) pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %edx @@ -352,6 +401,10 @@ L(Unalign16Both): test %edx, %edx jnz L(CopyFrom1To16Bytes) L(Unalign16BothBigN): +# ifdef __CHKP__ + bndcu 16(%esi, %ecx), %bnd1 + bndcu 15(%eax, %ecx), %bnd0 +# endif movaps 16(%esi, %ecx), %xmm3 movdqu %xmm2, (%eax, %ecx) pcmpeqb %xmm3, %xmm0 @@ -364,6 +417,10 @@ L(Unalign16BothBigN): test %edx, %edx jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%esi, %ecx), %bnd1 + bndcu 15(%eax, %ecx), %bnd0 +# endif movaps 16(%esi, %ecx), %xmm4 movdqu %xmm3, (%eax, %ecx) pcmpeqb %xmm4, %xmm0 @@ -376,6 +433,10 @@ L(Unalign16BothBigN): test %edx, %edx jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%esi, %ecx), %bnd1 + bndcu 15(%eax, %ecx), %bnd0 +# endif movaps 16(%esi, %ecx), %xmm1 movdqu %xmm4, (%eax, %ecx) pcmpeqb %xmm1, %xmm0 @@ -388,6 +449,10 @@ L(Unalign16BothBigN): test %edx, %edx jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%esi, %ecx), %bnd1 + bndcu 15(%eax, %ecx), %bnd0 +# endif movaps 16(%esi, %ecx), %xmm2 movdqu %xmm1, (%eax, %ecx) pcmpeqb %xmm2, %xmm0 @@ -400,6 +465,10 @@ L(Unalign16BothBigN): test %edx, %edx jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%esi, %ecx), %bnd1 + bndcu 15(%eax, %ecx), %bnd0 +# endif movaps 16(%esi, %ecx), %xmm3 movdqu %xmm2, (%eax, %ecx) pcmpeqb %xmm3, %xmm0 @@ -412,6 +481,9 @@ L(Unalign16BothBigN): test %edx, %edx jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 15(%eax, %ecx), %bnd0 +# endif movdqu %xmm3, (%eax, %ecx) mov %esi, %edx lea 16(%esi, %ecx), %esi @@ -420,6 +492,9 @@ L(Unalign16BothBigN): sub %edx, %eax # ifdef USE_AS_STRNCAT lea 128(%ebx, %edx), %ebx +# endif +# ifdef __CHKP__ + bndcu (%esi), %bnd1 # endif movaps (%esi), %xmm2 movaps %xmm2, %xmm4 @@ -443,6 +518,10 @@ L(Unalign16BothBigN): L(Unaligned64Loop_start): add $64, %eax add $64, %esi +# ifdef __CHKP__ + bndcu (%esi), %bnd1 + bndcu -1(%eax), %bnd0 +# endif movdqu %xmm4, -64(%eax) movaps (%esi), %xmm2 movdqa %xmm2, %xmm4 @@ -485,11 +564,18 @@ L(Unaligned64Leave): jnz L(CopyFrom1To16BytesUnaligned_32) bsf %ecx, %edx +# ifdef __CHKP__ + bndcu 47(%eax), %bnd0 +# endif movdqu %xmm4, (%eax) movdqu %xmm5, 16(%eax) movdqu %xmm6, 32(%eax) add $48, %esi add $48, %eax +# ifdef __CHKP__ + bndcu (%eax, %edx), %bnd0 + bndcu (%esi, %edx), %bnd1 +# endif BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) # ifdef USE_AS_STRNCAT @@ -501,12 +587,18 @@ L(BigN): test %edx, %edx jnz L(CopyFrom1To16BytesTail) +# ifdef __CHKP__ + bndcu 16(%esi), %bnd1 +# endif pcmpeqb 16(%esi), %xmm0 pmovmskb %xmm0, %edx test %edx, %edx jnz L(CopyFrom1To32Bytes) movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ +# ifdef __CHKP__ + bndcu 15(%eax), %bnd0 +# endif movdqu %xmm1, (%eax) sub %ecx, %eax sub $48, %ebx @@ -515,6 +607,9 @@ L(BigN): mov $16, %ecx movdqa (%esi, %ecx), %xmm1 movaps 16(%esi, %ecx), %xmm2 +# ifdef __CHKP__ + bndcu 15(%eax, %ecx), %bnd0 +# endif movdqu %xmm1, (%eax, %ecx) pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %edx @@ -532,12 +627,20 @@ L(CopyFrom1To16Bytes): add %ecx, %eax add %ecx, %esi bsf %edx, %edx +# ifdef __CHKP__ + bndcu (%eax, %edx), %bnd0 + bndcu (%esi, %edx), %bnd1 +# endif BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) .p2align 4 L(CopyFrom1To16BytesTail): add %ecx, %esi bsf %edx, %edx +# ifdef __CHKP__ + bndcu (%eax, %edx), %bnd0 + bndcu (%esi, %edx), %bnd1 +# endif BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) .p2align 4 @@ -546,6 +649,10 @@ L(CopyFrom1To32Bytes1): add $16, %eax L(CopyFrom1To16BytesTail1): bsf %edx, %edx +# ifdef __CHKP__ + bndcu (%eax, %edx), %bnd0 + bndcu (%esi, %edx), %bnd1 +# endif BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) .p2align 4 @@ -554,34 +661,60 @@ L(CopyFrom1To32Bytes): add %ecx, %esi add $16, %edx sub %ecx, %edx +# ifdef __CHKP__ + bndcu (%eax, %edx), %bnd0 + bndcu (%esi, %edx), %bnd1 +# endif BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) .p2align 4 L(CopyFrom1To16BytesUnaligned_0): bsf %edx, %edx +# ifdef __CHKP__ + bndcu (%eax, %edx), %bnd0 + bndcu (%esi, %edx), %bnd1 +# endif BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) .p2align 4 L(CopyFrom1To16BytesUnaligned_16): bsf %ecx, %edx +# ifdef __CHKP__ + bndcu 15(%eax), %bnd0 +# endif movdqu %xmm4, (%eax) add $16, %esi add $16, %eax +# ifdef __CHKP__ + bndcu (%eax, %edx), %bnd0 + bndcu (%esi, %edx), %bnd1 +# endif BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) .p2align 4 L(CopyFrom1To16BytesUnaligned_32): bsf %edx, %edx +# ifdef __CHKP__ + bndcu 31(%eax), %bnd0 +# endif movdqu %xmm4, (%eax) movdqu %xmm5, 16(%eax) add $32, %esi add $32, %eax +# ifdef __CHKP__ + bndcu (%eax, %edx), %bnd0 + bndcu (%esi, %edx), %bnd1 +# endif BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) # ifdef USE_AS_STRNCAT .p2align 4 L(CopyFrom1To16BytesExit): +# ifdef __CHKP__ + bndcu (%eax, %edx), %bnd0 + bndcu (%esi, %edx), %bnd1 +# endif BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) /* Case2 */ @@ -594,6 +727,10 @@ L(CopyFrom1To16BytesCase2): bsf %edx, %edx cmp %ebx, %edx jb L(CopyFrom1To16BytesExit) +# ifdef __CHKP__ + bndcu 1(%eax, %ebx), %bnd0 + bndcu 1(%esi, %ebx), %bnd1 +# endif BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) .p2align 4 @@ -605,6 +742,10 @@ L(CopyFrom1To32BytesCase2): sub %ecx, %edx cmp %ebx, %edx jb L(CopyFrom1To16BytesExit) +# ifdef __CHKP__ + bndcu 1(%eax, %ebx), %bnd0 + bndcu 1(%esi, %ebx), %bnd1 +# endif BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) L(CopyFrom1To16BytesTailCase2): @@ -613,12 +754,20 @@ L(CopyFrom1To16BytesTailCase2): bsf %edx, %edx cmp %ebx, %edx jb L(CopyFrom1To16BytesExit) +# ifdef __CHKP__ + bndcu 1(%eax, %ebx), %bnd0 + bndcu 1(%esi, %ebx), %bnd1 +# endif BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) L(CopyFrom1To16BytesTail1Case2): bsf %edx, %edx cmp %ebx, %edx jb L(CopyFrom1To16BytesExit) +# ifdef __CHKP__ + bndcu 1(%eax, %ebx), %bnd0 + bndcu 1(%esi, %ebx), %bnd1 +# endif BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) /* Case2 or Case3, Case3 */ @@ -631,6 +780,10 @@ L(CopyFrom1To16BytesCase3): add $16, %ebx add %ecx, %eax add %ecx, %esi +# ifdef __CHKP__ + bndcu 1(%eax, %ebx), %bnd0 + bndcu 1(%esi, %ebx), %bnd1 +# endif BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) .p2align 4 @@ -639,6 +792,10 @@ L(CopyFrom1To32BytesCase2OrCase3): jnz L(CopyFrom1To32BytesCase2) sub %ecx, %ebx add %ecx, %esi +# ifdef __CHKP__ + bndcu 1(%eax, %ebx), %bnd0 + bndcu 1(%esi, %ebx), %bnd1 +# endif BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) .p2align 4 @@ -647,6 +804,10 @@ L(CopyFrom1To16BytesTailCase2OrCase3): jnz L(CopyFrom1To16BytesTailCase2) sub %ecx, %ebx add %ecx, %esi +# ifdef __CHKP__ + bndcu 1(%eax, %ebx), %bnd0 + bndcu 1(%esi, %ebx), %bnd1 +# endif BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) .p2align 4 @@ -657,6 +818,10 @@ L(CopyFrom1To32Bytes1Case2OrCase3): L(CopyFrom1To16BytesTail1Case2OrCase3): test %edx, %edx jnz L(CopyFrom1To16BytesTail1Case2) +# ifdef __CHKP__ + bndcu 1(%eax, %ebx), %bnd0 + bndcu 1(%esi, %ebx), %bnd1 +# endif BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4) # endif @@ -1110,15 +1275,27 @@ L(Unaligned64LeaveCase3): and $-16, %ecx add $48, %ebx jl L(CopyFrom1To16BytesCase3) +# ifdef __CHKP__ + bndcu 15(%eax), %bnd0 +# endif movdqu %xmm4, (%eax) sub $16, %ebx jb L(CopyFrom1To16BytesCase3) +# ifdef __CHKP__ + bndcu 31(%eax), %bnd0 +# endif movdqu %xmm5, 16(%eax) sub $16, %ebx jb L(CopyFrom1To16BytesCase3) +# ifdef __CHKP__ + bndcu 47(%eax), %bnd0 +# endif movdqu %xmm6, 32(%eax) sub $16, %ebx jb L(CopyFrom1To16BytesCase3) +# ifdef __CHKP__ + bndcu 63(%eax), %bnd0 +# endif movdqu %xmm7, 48(%eax) xor %bh, %bh movb %bh, 64(%eax) @@ -1137,6 +1314,9 @@ L(Unaligned64LeaveCase2): pcmpeqb %xmm5, %xmm0 pmovmskb %xmm0, %edx +# ifdef __CHKP__ + bndcu 15(%eax), %bnd0 +# endif movdqu %xmm4, (%eax) add $16, %ecx sub $16, %ebx @@ -1146,6 +1326,9 @@ L(Unaligned64LeaveCase2): pcmpeqb %xmm6, %xmm0 pmovmskb %xmm0, %edx +# ifdef __CHKP__ + bndcu 31(%eax), %bnd0 +# endif movdqu %xmm5, 16(%eax) add $16, %ecx sub $16, %ebx @@ -1155,6 +1338,9 @@ L(Unaligned64LeaveCase2): pcmpeqb %xmm7, %xmm0 pmovmskb %xmm0, %edx +# ifdef __CHKP__ + bndcu 47(%eax), %bnd0 +# endif movdqu %xmm6, 32(%eax) lea 16(%eax, %ecx), %eax lea 16(%esi, %ecx), %esi diff --git a/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S index 938d74d318..1e59581f3e 100644 --- a/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S +++ b/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S @@ -46,6 +46,12 @@ ENTRY (__strchr_sse2_bsf) mov STR1(%esp), %ecx movd STR2(%esp), %xmm1 +# ifdef __CHKP__ + bndldx STR1(%esp,%ecx,1), %bnd0 + bndcl (%ecx), %bnd0 + bndcu (%ecx), %bnd0 +# endif + pxor %xmm2, %xmm2 mov %ecx, %edi punpcklbw %xmm1, %xmm1 @@ -81,6 +87,9 @@ ENTRY (__strchr_sse2_bsf) L(unaligned_match): add %edi, %eax add %ecx, %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif RETURN .p2align 4 @@ -94,6 +103,9 @@ L(unaligned_no_match): .p2align 4 /* Loop start on aligned string. */ L(loop): +# ifdef __CHKP__ + bndcu (%edi), %bnd0 +# endif movdqa (%edi), %xmm0 pcmpeqb %xmm0, %xmm2 add $16, %edi @@ -103,6 +115,9 @@ L(loop): or %eax, %edx jnz L(matches) +# ifdef __CHKP__ + bndcu (%edi), %bnd0 +# endif movdqa (%edi), %xmm0 pcmpeqb %xmm0, %xmm2 add $16, %edi @@ -112,6 +127,9 @@ L(loop): or %eax, %edx jnz L(matches) +# ifdef __CHKP__ + bndcu (%edi), %bnd0 +# endif movdqa (%edi), %xmm0 pcmpeqb %xmm0, %xmm2 add $16, %edi @@ -121,6 +139,9 @@ L(loop): or %eax, %edx jnz L(matches) +# ifdef __CHKP__ + bndcu (%edi), %bnd0 +# endif movdqa (%edi), %xmm0 pcmpeqb %xmm0, %xmm2 add $16, %edi @@ -146,6 +167,9 @@ L(matches): L(match): sub $16, %edi add %edi, %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif RETURN /* Return NULL. */ diff --git a/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/sysdeps/i386/i686/multiarch/strcmp-sse4.S index 355ed4e674..1958b36360 100644 --- a/sysdeps/i386/i686/multiarch/strcmp-sse4.S +++ b/sysdeps/i386/i686/multiarch/strcmp-sse4.S @@ -221,6 +221,12 @@ L(ascii): movl CNT(%esp), REM test REM, REM je L(eq) +#endif +#ifdef __CHKP__ + bndldx STR1(%esp,%edx,1), %bnd0 + bndldx STR2(%esp,%eax,1), %bnd1 + bndcl (%edx), %bnd0 + bndcl (%eax), %bnd1 #endif mov %dx, %cx and $0xfff, %cx @@ -280,6 +286,10 @@ L(ascii): add $16, %edx add $16, %eax L(first4bytes): +#ifdef __CHKP__ + bndcu (%edx), %bnd0 + bndcu (%eax), %bnd1 +#endif movzbl (%eax), %ecx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L movzbl (%edx), %edi @@ -303,6 +313,10 @@ L(first4bytes): je L(eq) #endif +#ifdef __CHKP__ + bndcu 1(%edx), %bnd0 + bndcu 1(%eax), %bnd1 +#endif movzbl 1(%eax), %ecx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L movzbl 1(%edx), %edi @@ -324,6 +338,10 @@ L(first4bytes): #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $2, REM je L(eq) +#endif +#ifdef __CHKP__ + bndcu 2(%edx), %bnd0 + bndcu 2(%eax), %bnd1 #endif movzbl 2(%eax), %ecx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L @@ -346,6 +364,10 @@ L(first4bytes): #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $3, REM je L(eq) +#endif +#ifdef __CHKP__ + bndcu 3(%edx), %bnd0 + bndcu 3(%eax), %bnd1 #endif movzbl 3(%eax), %ecx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L @@ -368,6 +390,10 @@ L(first4bytes): #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $4, REM je L(eq) +#endif +#ifdef __CHKP__ + bndcu 4(%edx), %bnd0 + bndcu 4(%eax), %bnd1 #endif movzbl 4(%eax), %ecx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L @@ -390,6 +416,10 @@ L(first4bytes): #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $5, REM je L(eq) +#endif +#ifdef __CHKP__ + bndcu 5(%edx), %bnd0 + bndcu 5(%eax), %bnd1 #endif movzbl 5(%eax), %ecx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L @@ -412,6 +442,10 @@ L(first4bytes): #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $6, REM je L(eq) +#endif +#ifdef __CHKP__ + bndcu 6(%edx), %bnd0 + bndcu 6(%eax), %bnd1 #endif movzbl 6(%eax), %ecx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L @@ -434,6 +468,10 @@ L(first4bytes): #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $7, REM je L(eq) +#endif +#ifdef __CHKP__ + bndcu 7(%edx), %bnd0 + bndcu 7(%eax), %bnd1 #endif movzbl 7(%eax), %ecx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L @@ -483,6 +521,10 @@ L(check_offset): testl %edx, %edx jg L(crosspage) L(loop): +#ifdef __CHKP__ + bndcu (%edi,%edx), %bnd0 + bndcu (%esi,%edx), %bnd1 +#endif movdqu (%esi,%edx), %xmm2 movdqu (%edi,%edx), %xmm1 TOLOWER (%xmm2, %xmm1) @@ -497,6 +539,10 @@ L(loop): add $16, %edx jle L(loop) L(crosspage): +#ifdef __CHKP__ + bndcu (%edi,%edx), %bnd0 + bndcu (%esi,%edx), %bnd1 +#endif movzbl (%edi,%edx), %eax movzbl (%esi,%edx), %ecx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L @@ -625,6 +671,10 @@ L(less16bytes): add $8, %eax L(less4bytes): +#ifdef __CHKP__ + bndcu (%edx), %bnd0 + bndcu (%eax), %bnd1 +#endif movzbl (%eax), %ecx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L movzbl (%edx), %edi @@ -646,6 +696,10 @@ L(less4bytes): #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $1, REM je L(eq) +#endif +#ifdef __CHKP__ + bndcu 1(%edx), %bnd0 + bndcu 1(%eax), %bnd1 #endif movzbl 1(%eax), %ecx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L @@ -670,6 +724,10 @@ L(less4bytes): je L(eq) #endif +#ifdef __CHKP__ + bndcu 2(%edx), %bnd0 + bndcu 2(%eax), %bnd1 +#endif movzbl 2(%eax), %ecx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L movzbl 2(%edx), %edi @@ -691,6 +749,10 @@ L(less4bytes): #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $3, REM je L(eq) +#endif +#ifdef __CHKP__ + bndcu 3(%edx), %bnd0 + bndcu 3(%eax), %bnd1 #endif movzbl 3(%eax), %ecx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L @@ -714,6 +776,10 @@ L(more4bytes): #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $4, REM je L(eq) +#endif +#ifdef __CHKP__ + bndcu 4(%edx), %bnd0 + bndcu 4(%eax), %bnd1 #endif movzbl 4(%eax), %ecx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L @@ -737,6 +803,10 @@ L(more4bytes): #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $5, REM je L(eq) +#endif +#ifdef __CHKP__ + bndcu 5(%edx), %bnd0 + bndcu 5(%eax), %bnd1 #endif movzbl 5(%eax), %ecx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L @@ -759,6 +829,10 @@ L(more4bytes): #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $6, REM je L(eq) +#endif +#ifdef __CHKP__ + bndcu 6(%edx), %bnd0 + bndcu 6(%eax), %bnd1 #endif movzbl 6(%eax), %ecx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L @@ -781,6 +855,10 @@ L(more4bytes): #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L cmp $7, REM je L(eq) +#endif +#ifdef __CHKP__ + bndcu 7(%edx), %bnd0 + bndcu 7(%eax), %bnd1 #endif movzbl 7(%eax), %ecx #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L diff --git a/sysdeps/i386/i686/multiarch/strcpy-sse2.S b/sysdeps/i386/i686/multiarch/strcpy-sse2.S index d942ac2852..4fdf7e0de1 100644 --- a/sysdeps/i386/i686/multiarch/strcpy-sse2.S +++ b/sysdeps/i386/i686/multiarch/strcpy-sse2.S @@ -85,6 +85,14 @@ ENTRY (STRCPY) movl LEN(%esp), %ebx test %ebx, %ebx jz L(ExitZero) +# ifdef __CHKP__ + bndldx STR1(%esp,%edi,1), %bnd0 + bndldx STR2(%esp,%esi,1), %bnd1 + bndcl (%esi), %bnd1 + bndcu (%esi), %bnd1 + bndcl (%edi), %bnd0 + bndcu -1(%edi, %ebx), %bnd0 +# endif mov %esi, %ecx # ifndef USE_AS_STPCPY @@ -111,6 +119,9 @@ ENTRY (STRCPY) test %edx, %edx jnz L(CopyFrom1To16BytesTail) +# ifdef __CHKP__ + bndcu 16(%esi), %bnd1 +# endif pcmpeqb 16(%esi), %xmm0 pmovmskb %xmm0, %edx # ifdef USE_AS_STPCPY @@ -124,6 +135,9 @@ ENTRY (STRCPY) jnz L(CopyFrom1To32Bytes) movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ +# ifdef __CHKP__ + bndcu 15(%edi), %bnd0 +# endif movdqu %xmm1, (%edi) sub %ecx, %edi @@ -132,6 +146,10 @@ ENTRY (STRCPY) .p2align 4 L(Unalign16Both): mov $16, %ecx +# ifdef __CHKP__ + bndcu 16(%esi, %ecx), %bnd1 + bndcu 15(%edi, %ecx), %bnd0 +# endif movdqa (%esi, %ecx), %xmm1 movaps 16(%esi, %ecx), %xmm2 movdqu %xmm1, (%edi, %ecx) @@ -143,6 +161,10 @@ L(Unalign16Both): test %edx, %edx jnz L(CopyFrom1To16BytesUnalignedXmm2) +# ifdef __CHKP__ + bndcu 16(%esi, %ecx), %bnd1 + bndcu 15(%edi, %ecx), %bnd0 +# endif movaps 16(%esi, %ecx), %xmm3 movdqu %xmm2, (%edi, %ecx) pcmpeqb %xmm3, %xmm0 @@ -153,6 +175,10 @@ L(Unalign16Both): test %edx, %edx jnz L(CopyFrom1To16BytesUnalignedXmm3) +# ifdef __CHKP__ + bndcu 16(%esi, %ecx), %bnd1 + bndcu 15(%edi, %ecx), %bnd0 +# endif movaps 16(%esi, %ecx), %xmm4 movdqu %xmm3, (%edi, %ecx) pcmpeqb %xmm4, %xmm0 @@ -163,6 +189,10 @@ L(Unalign16Both): test %edx, %edx jnz L(CopyFrom1To16BytesUnalignedXmm4) +# ifdef __CHKP__ + bndcu 16(%esi, %ecx), %bnd1 + bndcu 15(%edi, %ecx), %bnd0 +# endif movaps 16(%esi, %ecx), %xmm1 movdqu %xmm4, (%edi, %ecx) pcmpeqb %xmm1, %xmm0 @@ -173,6 +203,10 @@ L(Unalign16Both): test %edx, %edx jnz L(CopyFrom1To16BytesUnalignedXmm1) +# ifdef __CHKP__ + bndcu 16(%esi, %ecx), %bnd1 + bndcu 15(%edi, %ecx), %bnd0 +# endif movaps 16(%esi, %ecx), %xmm2 movdqu %xmm1, (%edi, %ecx) pcmpeqb %xmm2, %xmm0 @@ -183,6 +217,10 @@ L(Unalign16Both): test %edx, %edx jnz L(CopyFrom1To16BytesUnalignedXmm2) +# ifdef __CHKP__ + bndcu 16(%esi, %ecx), %bnd1 + bndcu 15(%edi, %ecx), %bnd0 +# endif movaps 16(%esi, %ecx), %xmm3 movdqu %xmm2, (%edi, %ecx) pcmpeqb %xmm3, %xmm0 @@ -193,6 +231,10 @@ L(Unalign16Both): test %edx, %edx jnz L(CopyFrom1To16BytesUnalignedXmm3) +# ifdef __CHKP__ + bndcu 16(%esi, %ecx), %bnd1 + bndcu 15(%edi, %ecx), %bnd0 +# endif movdqu %xmm3, (%edi, %ecx) mov %esi, %edx lea 16(%esi, %ecx), %esi @@ -202,6 +244,9 @@ L(Unalign16Both): lea 128(%ebx, %edx), %ebx L(Unaligned64Loop): +# ifdef __CHKP__ + bndcu 48(%esi), %bnd1 +# endif movaps (%esi), %xmm2 movaps %xmm2, %xmm4 movaps 16(%esi), %xmm5 @@ -220,6 +265,10 @@ L(Unaligned64Loop): L(Unaligned64Loop_start): add $64, %edi add $64, %esi +# ifdef __CHKP__ + bndcu (%esi), %bnd1 + bndcu (%edi), %bnd0 +# endif movdqu %xmm4, -64(%edi) movaps (%esi), %xmm2 movdqa %xmm2, %xmm4 @@ -259,15 +308,27 @@ L(Unaligned64Leave): jnz L(CopyFrom1To16BytesUnaligned_32) bsf %ecx, %edx +# ifdef __CHKP__ + bndcu 47(%edi), %bnd0 +# endif movdqu %xmm4, (%edi) movdqu %xmm5, 16(%edi) movdqu %xmm6, 32(%edi) # ifdef USE_AS_STPCPY +# ifdef __CHKP__ + bndcu 48(%edi, %edx), %bnd0 +# endif lea 48(%edi, %edx), %eax +# endif +# ifdef __CHKP__ + bndcu 63(%edi), %bnd0 # endif movdqu %xmm7, 48(%edi) add $15, %ebx sub %edx, %ebx +# ifdef __CHKP__ + bndcu 49(%edi, %edx), %bnd0 +# endif lea 49(%edi, %edx), %edi jmp L(StrncpyFillTailWithZero) @@ -288,6 +349,10 @@ L(SourceStringAlignmentZero): test %edx, %edx jnz L(CopyFrom1To16BytesTail1) +# ifdef __CHKP__ + bndcu 15(%edi), %bnd0 + bndcu 16(%esi), %bnd1 +# endif pcmpeqb 16(%esi), %xmm0 movdqu %xmm1, (%edi) pmovmskb %xmm0, %edx @@ -313,7 +378,7 @@ L(CopyFrom1To16BytesTail): bsf %edx, %edx BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) - .p2align 4 + .p2align 9 L(CopyFrom1To32Bytes1): add $16, %esi add $16, %edi @@ -336,6 +401,9 @@ L(CopyFrom1To16BytesUnaligned_0): bsf %edx, %edx # ifdef USE_AS_STPCPY lea (%edi, %edx), %eax +# endif +# ifdef __CHKP__ + bndcu 15(%edi), %bnd0 # endif movdqu %xmm4, (%edi) add $63, %ebx @@ -349,6 +417,9 @@ L(CopyFrom1To16BytesUnaligned_16): movdqu %xmm4, (%edi) # ifdef USE_AS_STPCPY lea 16(%edi, %edx), %eax +# endif +# ifdef __CHKP__ + bndcu 31(%edi), %bnd0 # endif movdqu %xmm5, 16(%edi) add $47, %ebx @@ -363,6 +434,9 @@ L(CopyFrom1To16BytesUnaligned_32): movdqu %xmm5, 16(%edi) # ifdef USE_AS_STPCPY lea 32(%edi, %edx), %eax +# endif +# ifdef __CHKP__ + bndcu 47(%edi), %bnd0 # endif movdqu %xmm6, 32(%edi) add $31, %ebx @@ -495,6 +569,9 @@ L(Exit1): .p2align 4 L(Exit2): movw (%esi), %dx +# ifdef __CHKP__ + bndcu 1(%edi), %bnd0 +# endif movw %dx, (%edi) # ifdef USE_AS_STPCPY lea 1(%edi), %eax @@ -507,6 +584,9 @@ L(Exit2): .p2align 4 L(Exit3): movw (%esi), %cx +# ifdef __CHKP__ + bndcu 2(%edi), %bnd0 +# endif movw %cx, (%edi) movb %dh, 2(%edi) # ifdef USE_AS_STPCPY @@ -520,6 +600,9 @@ L(Exit3): .p2align 4 L(Exit4): movl (%esi), %edx +# ifdef __CHKP__ + bndcu 3(%edi), %bnd0 +# endif movl %edx, (%edi) # ifdef USE_AS_STPCPY lea 3(%edi), %eax @@ -532,6 +615,9 @@ L(Exit4): .p2align 4 L(Exit5): movl (%esi), %ecx +# ifdef __CHKP__ + bndcu 4(%edi), %bnd0 +# endif movb %dh, 4(%edi) movl %ecx, (%edi) # ifdef USE_AS_STPCPY @@ -546,6 +632,9 @@ L(Exit5): L(Exit6): movl (%esi), %ecx movw 4(%esi), %dx +# ifdef __CHKP__ + bndcu 5(%edi), %bnd0 +# endif movl %ecx, (%edi) movw %dx, 4(%edi) # ifdef USE_AS_STPCPY @@ -560,6 +649,9 @@ L(Exit6): L(Exit7): movl (%esi), %ecx movl 3(%esi), %edx +# ifdef __CHKP__ + bndcu 6(%edi), %bnd0 +# endif movl %ecx, (%edi) movl %edx, 3(%edi) # ifdef USE_AS_STPCPY @@ -573,6 +665,9 @@ L(Exit7): .p2align 4 L(Exit8): movlpd (%esi), %xmm0 +# ifdef __CHKP__ + bndcu 7(%edi), %bnd0 +# endif movlpd %xmm0, (%edi) # ifdef USE_AS_STPCPY lea 7(%edi), %eax @@ -585,6 +680,9 @@ L(Exit8): .p2align 4 L(Exit9): movlpd (%esi), %xmm0 +# ifdef __CHKP__ + bndcu 8(%edi), %bnd0 +# endif movb %dh, 8(%edi) movlpd %xmm0, (%edi) # ifdef USE_AS_STPCPY @@ -599,6 +697,9 @@ L(Exit9): L(Exit10): movlpd (%esi), %xmm0 movw 8(%esi), %dx +# ifdef __CHKP__ + bndcu 9(%edi), %bnd0 +# endif movlpd %xmm0, (%edi) movw %dx, 8(%edi) # ifdef USE_AS_STPCPY @@ -613,6 +714,9 @@ L(Exit10): L(Exit11): movlpd (%esi), %xmm0 movl 7(%esi), %edx +# ifdef __CHKP__ + bndcu 10(%edi), %bnd0 +# endif movlpd %xmm0, (%edi) movl %edx, 7(%edi) # ifdef USE_AS_STPCPY @@ -627,6 +731,9 @@ L(Exit11): L(Exit12): movlpd (%esi), %xmm0 movl 8(%esi), %edx +# ifdef __CHKP__ + bndcu 11(%edi), %bnd0 +# endif movlpd %xmm0, (%edi) movl %edx, 8(%edi) # ifdef USE_AS_STPCPY @@ -641,6 +748,9 @@ L(Exit12): L(Exit13): movlpd (%esi), %xmm0 movlpd 5(%esi), %xmm1 +# ifdef __CHKP__ + bndcu 12(%edi), %bnd0 +# endif movlpd %xmm0, (%edi) movlpd %xmm1, 5(%edi) # ifdef USE_AS_STPCPY @@ -655,6 +765,9 @@ L(Exit13): L(Exit14): movlpd (%esi), %xmm0 movlpd 6(%esi), %xmm1 +# ifdef __CHKP__ + bndcu 13(%edi), %bnd0 +# endif movlpd %xmm0, (%edi) movlpd %xmm1, 6(%edi) # ifdef USE_AS_STPCPY @@ -669,6 +782,9 @@ L(Exit14): L(Exit15): movlpd (%esi), %xmm0 movlpd 7(%esi), %xmm1 +# ifdef __CHKP__ + bndcu 14(%edi), %bnd0 +# endif movlpd %xmm0, (%edi) movlpd %xmm1, 7(%edi) # ifdef USE_AS_STPCPY @@ -682,6 +798,9 @@ L(Exit15): .p2align 4 L(Exit16): movdqu (%esi), %xmm0 +# ifdef __CHKP__ + bndcu 15(%edi), %bnd0 +# endif movdqu %xmm0, (%edi) # ifdef USE_AS_STPCPY lea 15(%edi), %eax @@ -694,6 +813,9 @@ L(Exit16): .p2align 4 L(Exit17): movdqu (%esi), %xmm0 +# ifdef __CHKP__ + bndcu 16(%edi), %bnd0 +# endif movdqu %xmm0, (%edi) movb %dh, 16(%edi) # ifdef USE_AS_STPCPY @@ -708,6 +830,9 @@ L(Exit17): L(Exit18): movdqu (%esi), %xmm0 movw 16(%esi), %cx +# ifdef __CHKP__ + bndcu 17(%edi), %bnd0 +# endif movdqu %xmm0, (%edi) movw %cx, 16(%edi) # ifdef USE_AS_STPCPY @@ -722,6 +847,9 @@ L(Exit18): L(Exit19): movdqu (%esi), %xmm0 movl 15(%esi), %ecx +# ifdef __CHKP__ + bndcu 18(%edi), %bnd0 +# endif movdqu %xmm0, (%edi) movl %ecx, 15(%edi) # ifdef USE_AS_STPCPY @@ -736,6 +864,9 @@ L(Exit19): L(Exit20): movdqu (%esi), %xmm0 movl 16(%esi), %ecx +# ifdef __CHKP__ + bndcu 19(%edi), %bnd0 +# endif movdqu %xmm0, (%edi) movl %ecx, 16(%edi) # ifdef USE_AS_STPCPY @@ -750,6 +881,9 @@ L(Exit20): L(Exit21): movdqu (%esi), %xmm0 movl 16(%esi), %ecx +# ifdef __CHKP__ + bndcu 20(%edi), %bnd0 +# endif movdqu %xmm0, (%edi) movl %ecx, 16(%edi) movb %dh, 20(%edi) @@ -765,6 +899,9 @@ L(Exit21): L(Exit22): movdqu (%esi), %xmm0 movlpd 14(%esi), %xmm3 +# ifdef __CHKP__ + bndcu 21(%edi), %bnd0 +# endif movdqu %xmm0, (%edi) movlpd %xmm3, 14(%edi) # ifdef USE_AS_STPCPY @@ -779,6 +916,9 @@ L(Exit22): L(Exit23): movdqu (%esi), %xmm0 movlpd 15(%esi), %xmm3 +# ifdef __CHKP__ + bndcu 22(%edi), %bnd0 +# endif movdqu %xmm0, (%edi) movlpd %xmm3, 15(%edi) # ifdef USE_AS_STPCPY @@ -793,6 +933,9 @@ L(Exit23): L(Exit24): movdqu (%esi), %xmm0 movlpd 16(%esi), %xmm2 +# ifdef __CHKP__ + bndcu 23(%edi), %bnd0 +# endif movdqu %xmm0, (%edi) movlpd %xmm2, 16(%edi) # ifdef USE_AS_STPCPY @@ -807,6 +950,9 @@ L(Exit24): L(Exit25): movdqu (%esi), %xmm0 movlpd 16(%esi), %xmm2 +# ifdef __CHKP__ + bndcu 24(%edi), %bnd0 +# endif movdqu %xmm0, (%edi) movlpd %xmm2, 16(%edi) movb %dh, 24(%edi) @@ -823,6 +969,9 @@ L(Exit26): movdqu (%esi), %xmm0 movlpd 16(%esi), %xmm2 movw 24(%esi), %cx +# ifdef __CHKP__ + bndcu 25(%edi), %bnd0 +# endif movdqu %xmm0, (%edi) movlpd %xmm2, 16(%edi) movw %cx, 24(%edi) @@ -839,6 +988,9 @@ L(Exit27): movdqu (%esi), %xmm0 movlpd 16(%esi), %xmm2 movl 23(%esi), %ecx +# ifdef __CHKP__ + bndcu 26(%edi), %bnd0 +# endif movdqu %xmm0, (%edi) movlpd %xmm2, 16(%edi) movl %ecx, 23(%edi) @@ -855,6 +1007,9 @@ L(Exit28): movdqu (%esi), %xmm0 movlpd 16(%esi), %xmm2 movl 24(%esi), %ecx +# ifdef __CHKP__ + bndcu 27(%edi), %bnd0 +# endif movdqu %xmm0, (%edi) movlpd %xmm2, 16(%edi) movl %ecx, 24(%edi) @@ -870,6 +1025,9 @@ L(Exit28): L(Exit29): movdqu (%esi), %xmm0 movdqu 13(%esi), %xmm2 +# ifdef __CHKP__ + bndcu 28(%edi), %bnd0 +# endif movdqu %xmm0, (%edi) movdqu %xmm2, 13(%edi) # ifdef USE_AS_STPCPY @@ -884,6 +1042,9 @@ L(Exit29): L(Exit30): movdqu (%esi), %xmm0 movdqu 14(%esi), %xmm2 +# ifdef __CHKP__ + bndcu 29(%edi), %bnd0 +# endif movdqu %xmm0, (%edi) movdqu %xmm2, 14(%edi) # ifdef USE_AS_STPCPY @@ -899,6 +1060,9 @@ L(Exit30): L(Exit31): movdqu (%esi), %xmm0 movdqu 15(%esi), %xmm2 +# ifdef __CHKP__ + bndcu 30(%edi), %bnd0 +# endif movdqu %xmm0, (%edi) movdqu %xmm2, 15(%edi) # ifdef USE_AS_STPCPY @@ -913,6 +1077,9 @@ L(Exit31): L(Exit32): movdqu (%esi), %xmm0 movdqu 16(%esi), %xmm2 +# ifdef __CHKP__ + bndcu 31(%edi), %bnd0 +# endif movdqu %xmm0, (%edi) movdqu %xmm2, 16(%edi) # ifdef USE_AS_STPCPY @@ -1612,37 +1779,90 @@ ENTRY (STRCPY) ENTRANCE mov STR1(%esp), %edx mov STR2(%esp), %ecx +# ifdef __CHKP__ + bndldx STR1(%esp,%edx,1), %bnd0 + bndldx STR2(%esp,%ecx,1), %bnd1 + bndcl (%ecx), %bnd1 + bndcu (%ecx), %bnd1 + bndcl (%edx), %bnd0 + bndcu (%edx), %bnd0 +# endif cmpb $0, (%ecx) jz L(ExitTail1) +# ifdef __CHKP__ + bndcu 1(%ecx), %bnd1 +# endif cmpb $0, 1(%ecx) jz L(ExitTail2) +# ifdef __CHKP__ + bndcu 2(%ecx), %bnd1 +# endif cmpb $0, 2(%ecx) jz L(ExitTail3) +# ifdef __CHKP__ + bndcu 3(%ecx), %bnd1 +# endif cmpb $0, 3(%ecx) jz L(ExitTail4) +# ifdef __CHKP__ + bndcu 4(%ecx), %bnd1 +# endif cmpb $0, 4(%ecx) jz L(ExitTail5) +# ifdef __CHKP__ + bndcu 5(%ecx), %bnd1 +# endif cmpb $0, 5(%ecx) jz L(ExitTail6) +# ifdef __CHKP__ + bndcu 6(%ecx), %bnd1 +# endif cmpb $0, 6(%ecx) jz L(ExitTail7) +# ifdef __CHKP__ + bndcu 7(%ecx), %bnd1 +# endif cmpb $0, 7(%ecx) jz L(ExitTail8) +# ifdef __CHKP__ + bndcu 8(%ecx), %bnd1 +# endif cmpb $0, 8(%ecx) jz L(ExitTail9) +# ifdef __CHKP__ + bndcu 9(%ecx), %bnd1 +# endif cmpb $0, 9(%ecx) jz L(ExitTail10) +# ifdef __CHKP__ + bndcu 10(%ecx), %bnd1 +# endif cmpb $0, 10(%ecx) jz L(ExitTail11) +# ifdef __CHKP__ + bndcu 11(%ecx), %bnd1 +# endif cmpb $0, 11(%ecx) jz L(ExitTail12) +# ifdef __CHKP__ + bndcu 12(%ecx), %bnd1 +# endif cmpb $0, 12(%ecx) jz L(ExitTail13) +# ifdef __CHKP__ + bndcu 13(%ecx), %bnd1 +# endif cmpb $0, 13(%ecx) jz L(ExitTail14) +# ifdef __CHKP__ + bndcu 14(%ecx), %bnd1 +# endif cmpb $0, 14(%ecx) jz L(ExitTail15) +# ifdef __CHKP__ + bndcu 15(%ecx), %bnd1 +# endif cmpb $0, 15(%ecx) jz L(ExitTail16) @@ -1654,6 +1874,9 @@ ENTRY (STRCPY) and $-16, %ebx pxor %xmm0, %xmm0 movdqu (%ecx), %xmm1 +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 +# endif movdqu %xmm1, (%edx) pcmpeqb (%ebx), %xmm0 pmovmskb %xmm0, %eax @@ -1669,6 +1892,10 @@ ENTRY (STRCPY) xor %ebx, %ebx .p2align 4 +# ifdef __CHKP__ + bndcu 16(%ecx), %bnd1 + bndcu 15(%edx), %bnd0 +# endif movdqa (%ecx), %xmm1 movaps 16(%ecx), %xmm2 movdqu %xmm1, (%edx) @@ -1678,6 +1905,10 @@ ENTRY (STRCPY) test %eax, %eax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%ecx, %ebx), %bnd1 + bndcu 15(%edx, %ebx), %bnd0 +# endif movaps 16(%ecx, %ebx), %xmm3 movdqu %xmm2, (%edx, %ebx) pcmpeqb %xmm3, %xmm0 @@ -1686,6 +1917,10 @@ ENTRY (STRCPY) test %eax, %eax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%ecx, %ebx), %bnd1 + bndcu 15(%edx, %ebx), %bnd0 +# endif movaps 16(%ecx, %ebx), %xmm4 movdqu %xmm3, (%edx, %ebx) pcmpeqb %xmm4, %xmm0 @@ -1694,6 +1929,10 @@ ENTRY (STRCPY) test %eax, %eax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%ecx, %ebx), %bnd1 + bndcu 15(%edx, %ebx), %bnd0 +# endif movaps 16(%ecx, %ebx), %xmm1 movdqu %xmm4, (%edx, %ebx) pcmpeqb %xmm1, %xmm0 @@ -1702,6 +1941,10 @@ ENTRY (STRCPY) test %eax, %eax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%ecx, %ebx), %bnd1 + bndcu 15(%edx, %ebx), %bnd0 +# endif movaps 16(%ecx, %ebx), %xmm2 movdqu %xmm1, (%edx, %ebx) pcmpeqb %xmm2, %xmm0 @@ -1710,6 +1953,10 @@ ENTRY (STRCPY) test %eax, %eax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 16(%ecx, %ebx), %bnd1 + bndcu 15(%edx, %ebx), %bnd0 +# endif movaps 16(%ecx, %ebx), %xmm3 movdqu %xmm2, (%edx, %ebx) pcmpeqb %xmm3, %xmm0 @@ -1718,6 +1965,9 @@ ENTRY (STRCPY) test %eax, %eax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 15(%edx, %ebx), %bnd0 +# endif movdqu %xmm3, (%edx, %ebx) mov %ecx, %eax lea 16(%ecx, %ebx), %ecx @@ -1726,6 +1976,9 @@ ENTRY (STRCPY) sub %eax, %edx L(Aligned64Loop): +# ifdef __CHKP__ + bndcu (%ecx), %bnd1 +# endif movaps (%ecx), %xmm2 movaps %xmm2, %xmm4 movaps 16(%ecx), %xmm5 @@ -1742,6 +1995,10 @@ L(Aligned64Loop): test %eax, %eax jnz L(Aligned64Leave) L(Aligned64Loop_start): +# ifdef __CHKP__ + bndcu (%ecx), %bnd1 + bndcu -1(%edx), %bnd0 +# endif movdqu %xmm4, -64(%edx) movaps (%ecx), %xmm2 movdqa %xmm2, %xmm4 @@ -1771,6 +2028,9 @@ L(Aligned64Leave): pcmpeqb %xmm5, %xmm0 pmovmskb %xmm0, %eax +# ifdef __CHKP__ + bndcu -49(%edx), %bnd0 +# endif movdqu %xmm4, -64(%edx) test %eax, %eax lea 16(%ebx), %ebx @@ -1778,11 +2038,17 @@ L(Aligned64Leave): pcmpeqb %xmm6, %xmm0 pmovmskb %xmm0, %eax +# ifdef __CHKP__ + bndcu -33(%edx), %bnd0 +# endif movdqu %xmm5, -48(%edx) test %eax, %eax lea 16(%ebx), %ebx jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu -17(%edx), %bnd0 +# endif movdqu %xmm6, -32(%edx) pcmpeqb %xmm7, %xmm0 pmovmskb %xmm0, %eax @@ -1813,6 +2079,10 @@ L(CopyFrom1To16Bytes): test $0x40, %al jnz L(Exit7) /* Exit 8 */ +# ifdef __CHKP__ + bndcu 7(%edx), %bnd0 + bndcu 7(%ecx), %bnd1 +# endif movl (%ecx), %eax movl %eax, (%edx) movl 4(%ecx), %eax @@ -1841,6 +2111,10 @@ L(ExitHigh): test $0x40, %ah jnz L(Exit15) /* Exit 16 */ +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 + bndcu 15(%ecx), %bnd1 +# endif movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movlpd 8(%ecx), %xmm0 @@ -1854,6 +2128,10 @@ L(ExitHigh): .p2align 4 L(Exit1): +# ifdef __CHKP__ + bndcu (%edx), %bnd0 + bndcu (%ecx), %bnd1 +# endif movb (%ecx), %al movb %al, (%edx) # ifdef USE_AS_STPCPY @@ -1865,6 +2143,10 @@ L(Exit1): .p2align 4 L(Exit2): +# ifdef __CHKP__ + bndcu 1(%edx), %bnd0 + bndcu 1(%ecx), %bnd1 +# endif movw (%ecx), %ax movw %ax, (%edx) # ifdef USE_AS_STPCPY @@ -1876,6 +2158,10 @@ L(Exit2): .p2align 4 L(Exit3): +# ifdef __CHKP__ + bndcu 2(%edx), %bnd0 + bndcu 2(%ecx), %bnd1 +# endif movw (%ecx), %ax movw %ax, (%edx) movb 2(%ecx), %al @@ -1889,6 +2175,10 @@ L(Exit3): .p2align 4 L(Exit4): +# ifdef __CHKP__ + bndcu 3(%edx), %bnd0 + bndcu 3(%ecx), %bnd1 +# endif movl (%ecx), %eax movl %eax, (%edx) # ifdef USE_AS_STPCPY @@ -1900,6 +2190,10 @@ L(Exit4): .p2align 4 L(Exit5): +# ifdef __CHKP__ + bndcu 4(%edx), %bnd0 + bndcu 4(%ecx), %bnd1 +# endif movl (%ecx), %eax movl %eax, (%edx) movb 4(%ecx), %al @@ -1913,6 +2207,10 @@ L(Exit5): .p2align 4 L(Exit6): +# ifdef __CHKP__ + bndcu 5(%edx), %bnd0 + bndcu 5(%ecx), %bnd1 +# endif movl (%ecx), %eax movl %eax, (%edx) movw 4(%ecx), %ax @@ -1926,6 +2224,10 @@ L(Exit6): .p2align 4 L(Exit7): +# ifdef __CHKP__ + bndcu 6(%edx), %bnd0 + bndcu 6(%ecx), %bnd1 +# endif movl (%ecx), %eax movl %eax, (%edx) movl 3(%ecx), %eax @@ -1939,6 +2241,10 @@ L(Exit7): .p2align 4 L(Exit9): +# ifdef __CHKP__ + bndcu 8(%edx), %bnd0 + bndcu 8(%ecx), %bnd1 +# endif movl (%ecx), %eax movl %eax, (%edx) movl 4(%ecx), %eax @@ -1954,6 +2260,10 @@ L(Exit9): .p2align 4 L(Exit10): +# ifdef __CHKP__ + bndcu 9(%edx), %bnd0 + bndcu 9(%ecx), %bnd1 +# endif movl (%ecx), %eax movl %eax, (%edx) movl 4(%ecx), %eax @@ -1969,6 +2279,10 @@ L(Exit10): .p2align 4 L(Exit11): +# ifdef __CHKP__ + bndcu 10(%edx), %bnd0 + bndcu 10(%ecx), %bnd1 +# endif movl (%ecx), %eax movl %eax, (%edx) movl 4(%ecx), %eax @@ -1984,6 +2298,10 @@ L(Exit11): .p2align 4 L(Exit12): +# ifdef __CHKP__ + bndcu 11(%edx), %bnd0 + bndcu 11(%ecx), %bnd1 +# endif movl (%ecx), %eax movl %eax, (%edx) movl 4(%ecx), %eax @@ -1999,6 +2317,10 @@ L(Exit12): .p2align 4 L(Exit13): +# ifdef __CHKP__ + bndcu 12(%edx), %bnd0 + bndcu 12(%ecx), %bnd1 +# endif movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movlpd 5(%ecx), %xmm0 @@ -2012,6 +2334,10 @@ L(Exit13): .p2align 4 L(Exit14): +# ifdef __CHKP__ + bndcu 13(%edx), %bnd0 + bndcu 13(%ecx), %bnd1 +# endif movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movlpd 6(%ecx), %xmm0 @@ -2025,6 +2351,10 @@ L(Exit14): .p2align 4 L(Exit15): +# ifdef __CHKP__ + bndcu 14(%edx), %bnd0 + bndcu 14(%ecx), %bnd1 +# endif movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movlpd 7(%ecx), %xmm0 @@ -2040,6 +2370,9 @@ CFI_POP (%edi) .p2align 4 L(ExitTail1): +# ifdef __CHKP__ + bndcu (%edx), %bnd0 +# endif movb (%ecx), %al movb %al, (%edx) movl %edx, %eax @@ -2048,6 +2381,9 @@ L(ExitTail1): .p2align 4 L(ExitTail2): movw (%ecx), %ax +# ifdef __CHKP__ + bndcu 1(%edx), %bnd0 +# endif movw %ax, (%edx) # ifdef USE_AS_STPCPY lea 1(%edx), %eax @@ -2059,6 +2395,9 @@ L(ExitTail2): .p2align 4 L(ExitTail3): movw (%ecx), %ax +# ifdef __CHKP__ + bndcu 2(%edx), %bnd0 +# endif movw %ax, (%edx) movb 2(%ecx), %al movb %al, 2(%edx) @@ -2072,6 +2411,9 @@ L(ExitTail3): .p2align 4 L(ExitTail4): movl (%ecx), %eax +# ifdef __CHKP__ + bndcu 3(%edx), %bnd0 +# endif movl %eax, (%edx) # ifdef USE_AS_STPCPY lea 3(%edx), %eax @@ -2083,6 +2425,9 @@ L(ExitTail4): .p2align 4 L(ExitTail5): movl (%ecx), %eax +# ifdef __CHKP__ + bndcu 4(%edx), %bnd0 +# endif movl %eax, (%edx) movb 4(%ecx), %al movb %al, 4(%edx) @@ -2096,6 +2441,9 @@ L(ExitTail5): .p2align 4 L(ExitTail6): movl (%ecx), %eax +# ifdef __CHKP__ + bndcu 5(%edx), %bnd0 +# endif movl %eax, (%edx) movw 4(%ecx), %ax movw %ax, 4(%edx) @@ -2109,6 +2457,9 @@ L(ExitTail6): .p2align 4 L(ExitTail7): movl (%ecx), %eax +# ifdef __CHKP__ + bndcu 6(%edx), %bnd0 +# endif movl %eax, (%edx) movl 3(%ecx), %eax movl %eax, 3(%edx) @@ -2122,6 +2473,9 @@ L(ExitTail7): .p2align 4 L(ExitTail8): movl (%ecx), %eax +# ifdef __CHKP__ + bndcu 7(%edx), %bnd0 +# endif movl %eax, (%edx) movl 4(%ecx), %eax movl %eax, 4(%edx) @@ -2135,6 +2489,9 @@ L(ExitTail8): .p2align 4 L(ExitTail9): movl (%ecx), %eax +# ifdef __CHKP__ + bndcu 8(%edx), %bnd0 +# endif movl %eax, (%edx) movl 4(%ecx), %eax movl %eax, 4(%edx) @@ -2150,6 +2507,9 @@ L(ExitTail9): .p2align 4 L(ExitTail10): movl (%ecx), %eax +# ifdef __CHKP__ + bndcu 9(%edx), %bnd0 +# endif movl %eax, (%edx) movl 4(%ecx), %eax movl %eax, 4(%edx) @@ -2165,6 +2525,9 @@ L(ExitTail10): .p2align 4 L(ExitTail11): movl (%ecx), %eax +# ifdef __CHKP__ + bndcu 10(%edx), %bnd0 +# endif movl %eax, (%edx) movl 4(%ecx), %eax movl %eax, 4(%edx) @@ -2180,6 +2543,9 @@ L(ExitTail11): .p2align 4 L(ExitTail12): movl (%ecx), %eax +# ifdef __CHKP__ + bndcu 11(%edx), %bnd0 +# endif movl %eax, (%edx) movl 4(%ecx), %eax movl %eax, 4(%edx) @@ -2195,6 +2561,9 @@ L(ExitTail12): .p2align 4 L(ExitTail13): movlpd (%ecx), %xmm0 +# ifdef __CHKP__ + bndcu 12(%edx), %bnd0 +# endif movlpd %xmm0, (%edx) movlpd 5(%ecx), %xmm0 movlpd %xmm0, 5(%edx) @@ -2208,6 +2577,9 @@ L(ExitTail13): .p2align 4 L(ExitTail14): movlpd (%ecx), %xmm0 +# ifdef __CHKP__ + bndcu 13(%edx), %bnd0 +# endif movlpd %xmm0, (%edx) movlpd 6(%ecx), %xmm0 movlpd %xmm0, 6(%edx) @@ -2221,6 +2593,9 @@ L(ExitTail14): .p2align 4 L(ExitTail15): movlpd (%ecx), %xmm0 +# ifdef __CHKP__ + bndcu 14(%edx), %bnd0 +# endif movlpd %xmm0, (%edx) movlpd 7(%ecx), %xmm0 movlpd %xmm0, 7(%edx) @@ -2234,6 +2609,9 @@ L(ExitTail15): .p2align 4 L(ExitTail16): movlpd (%ecx), %xmm0 +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 +# endif movlpd %xmm0, (%edx) movlpd 8(%ecx), %xmm0 movlpd %xmm0, 8(%edx) diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S index 32db65cbd2..ab537c1f41 100644 --- a/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S +++ b/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S @@ -41,6 +41,11 @@ ENTRY ( __strlen_sse2_bsf) ENTRANCE mov STR(%esp), %edi +#ifdef __CHKP__ + bndldx STR(%esp,%edi,1), %bnd0 + bndcl (%edi),%bnd0 + bndcu (%edi),%bnd0 +#endif xor %eax, %eax mov %edi, %ecx and $0x3f, %ecx @@ -73,21 +78,33 @@ L(align16_start): pxor %xmm3, %xmm3 .p2align 4 L(align16_loop): +#ifdef __CHKP__ + bndcu 16(%eax), %bnd0 +#endif pcmpeqb 16(%eax), %xmm0 pmovmskb %xmm0, %edx test %edx, %edx jnz L(exit16) +#ifdef __CHKP__ + bndcu 32(%eax), %bnd0 +#endif pcmpeqb 32(%eax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx jnz L(exit32) +#ifdef __CHKP__ + bndcu 48(%eax), %bnd0 +#endif pcmpeqb 48(%eax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx jnz L(exit48) +#ifdef __CHKP__ + bndcu 64(%eax), %bnd0 +#endif pcmpeqb 64(%eax), %xmm3 pmovmskb %xmm3, %edx lea 64(%eax), %eax @@ -98,24 +115,36 @@ L(exit): L(exit_less16): bsf %edx, %edx add %edx, %eax +#ifdef __CHKP__ + bndcu -1(%edi, %eax), %bnd0 +#endif RETURN L(exit16): sub %edi, %eax bsf %edx, %edx add %edx, %eax add $16, %eax +#ifdef __CHKP__ + bndcu -1(%edi, %eax), %bnd0 +#endif RETURN L(exit32): sub %edi, %eax bsf %edx, %edx add %edx, %eax add $32, %eax +#ifdef __CHKP__ + bndcu -1(%edi, %eax), %bnd0 +#endif RETURN L(exit48): sub %edi, %eax bsf %edx, %edx add %edx, %eax add $48, %eax +#ifdef __CHKP__ + bndcu -1(%edi, %eax), %bnd0 +#endif POP (%edi) POP (%esi) ret diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2.S b/sysdeps/i386/i686/multiarch/strlen-sse2.S index a4f2806cfe..3d0743ec88 100644 --- a/sysdeps/i386/i686/multiarch/strlen-sse2.S +++ b/sysdeps/i386/i686/multiarch/strlen-sse2.S @@ -41,7 +41,10 @@ # define PUSH(REG) pushl REG; CFI_PUSH (REG) # define POP(REG) popl REG; CFI_POP (REG) # undef RETURN -# define RETURN POP (%edi); CFI_PUSH(%edi); ret +# define RETURN \ + mov STR+4(%esp),%edx; \ + bndcu -1(%edx,%eax), %bnd0; \ + POP (%edi); CFI_PUSH(%edi); ret # endif # ifndef STRLEN @@ -51,12 +54,19 @@ atom_text_section ENTRY (STRLEN) mov STR(%esp), %edx +# ifdef __CHKP__ + bndldx STR(%esp,%edx,1), %bnd0 +# endif # ifdef USE_AS_STRNLEN PUSH (%edi) movl LEN(%esp), %edi sub $4, %edi jbe L(len_less4_prolog) # endif +# ifdef __CHKP__ + bndcl (%edx),%bnd0 + bndcu (%edx),%bnd0 +# endif # endif xor %eax, %eax cmpb $0, (%edx) @@ -122,6 +132,9 @@ ENTRY (STRLEN) jbe L(len_less64) # endif +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif pcmpeqb (%eax), %xmm0 pmovmskb %xmm0, %edx pxor %xmm1, %xmm1 @@ -129,6 +142,9 @@ ENTRY (STRLEN) lea 16(%eax), %eax jnz L(exit) +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif pcmpeqb (%eax), %xmm1 pmovmskb %xmm1, %edx pxor %xmm2, %xmm2 @@ -136,6 +152,9 @@ ENTRY (STRLEN) lea 16(%eax), %eax jnz L(exit) +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif pcmpeqb (%eax), %xmm2 pmovmskb %xmm2, %edx pxor %xmm3, %xmm3 @@ -143,6 +162,9 @@ ENTRY (STRLEN) lea 16(%eax), %eax jnz L(exit) +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif pcmpeqb (%eax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx @@ -154,24 +176,36 @@ ENTRY (STRLEN) jbe L(len_less64) # endif +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif pcmpeqb (%eax), %xmm0 pmovmskb %xmm0, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif pcmpeqb (%eax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif pcmpeqb (%eax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif pcmpeqb (%eax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx @@ -183,24 +217,36 @@ ENTRY (STRLEN) jbe L(len_less64) # endif +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif pcmpeqb (%eax), %xmm0 pmovmskb %xmm0, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif pcmpeqb (%eax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif pcmpeqb (%eax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif pcmpeqb (%eax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx @@ -212,24 +258,36 @@ ENTRY (STRLEN) jbe L(len_less64) # endif +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif pcmpeqb (%eax), %xmm0 pmovmskb %xmm0, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif pcmpeqb (%eax), %xmm1 pmovmskb %xmm1, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif pcmpeqb (%eax), %xmm2 pmovmskb %xmm2, %edx test %edx, %edx lea 16(%eax), %eax jnz L(exit) +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif pcmpeqb (%eax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx @@ -250,6 +308,9 @@ L(aligned_64_loop): sub $64, %edi jbe L(len_less64) # endif +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif movaps (%eax), %xmm0 movaps 16(%eax), %xmm1 movaps 32(%eax), %xmm2 @@ -535,6 +596,10 @@ L(len_less4_prolog): add $4, %edi jz L(exit_tail0) +# ifdef __CHKP__ + bndcl (%edx),%bnd0 + bndcu (%edx),%bnd0 +# endif cmpb $0, (%edx) jz L(exit_tail0) cmp $1, %edi diff --git a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S index e026c40683..1c907a46de 100644 --- a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S +++ b/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S @@ -42,6 +42,12 @@ ENTRY (__strrchr_sse2_bsf) mov STR1(%esp), %ecx movd STR2(%esp), %xmm1 +# ifdef __CHKP__ + bndldx STR1(%esp,%ecx,1), %bnd0 + bndcl (%ecx), %bnd0 + bndcu (%ecx), %bnd0 +# endif + PUSH (%edi) pxor %xmm2, %xmm2 mov %ecx, %edi @@ -90,6 +96,9 @@ L(unaligned_return_value1): jz L(return_null) bsr %eax, %eax add %edi, %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif POP (%edi) ret CFI_PUSH (%edi) @@ -156,6 +165,9 @@ L(unaligned_return_value): jz L(return_null) bsr %eax, %eax add %edi, %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif POP (%edi) ret CFI_PUSH (%edi) @@ -175,6 +187,9 @@ L(unaligned_match): /* Loop start on aligned string. */ .p2align 4 L(loop): +# ifdef __CHKP__ + bndcu (%edi), %bnd0 +# endif movdqa (%edi), %xmm0 pcmpeqb %xmm0, %xmm2 add $16, %edi @@ -184,6 +199,9 @@ L(loop): or %eax, %ecx jnz L(matches) +# ifdef __CHKP__ + bndcu (%edi), %bnd0 +# endif movdqa (%edi), %xmm0 pcmpeqb %xmm0, %xmm2 add $16, %edi @@ -193,6 +211,9 @@ L(loop): or %eax, %ecx jnz L(matches) +# ifdef __CHKP__ + bndcu (%edi), %bnd0 +# endif movdqa (%edi), %xmm0 pcmpeqb %xmm0, %xmm2 add $16, %edi @@ -202,6 +223,9 @@ L(loop): or %eax, %ecx jnz L(matches) +# ifdef __CHKP__ + bndcu (%edi), %bnd0 +# endif movdqa (%edi), %xmm0 pcmpeqb %xmm0, %xmm2 add $16, %edi @@ -224,6 +248,9 @@ L(return_value): POP (%esi) sub $16, %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif POP (%edi) ret @@ -255,6 +282,9 @@ L(return_value_1): bsr %eax, %eax add %edi, %eax sub $16, %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif POP (%edi) ret diff --git a/sysdeps/i386/i686/multiarch/wcschr-sse2.S b/sysdeps/i386/i686/multiarch/wcschr-sse2.S index 63101d9c9f..e06274aecf 100644 --- a/sysdeps/i386/i686/multiarch/wcschr-sse2.S +++ b/sysdeps/i386/i686/multiarch/wcschr-sse2.S @@ -40,7 +40,11 @@ ENTRY (__wcschr_sse2) mov STR1(%esp), %ecx movd STR2(%esp), %xmm1 - +# ifdef __CHKP__ + bndldx STR1(%esp,%ecx,1), %bnd0 + bndcl (%ecx),%bnd0 + bndcu (%ecx),%bnd0 +# endif mov %ecx, %eax punpckldq %xmm1, %xmm1 pxor %xmm2, %xmm2 @@ -90,6 +94,9 @@ L(cross_cache): test $15, %dl jnz L(return_null) lea 4(%ecx), %eax +# ifdef __CHKP__ + bndcu (%eax),%bnd0 +# endif ret CFI_PUSH (%edi) @@ -108,6 +115,9 @@ L(unaligned_no_match): .p2align 4 L(loop): add $16, %ecx +# ifdef __CHKP__ + bndcu (%ecx),%bnd0 +# endif movdqa (%ecx), %xmm0 pcmpeqd %xmm0, %xmm2 pcmpeqd %xmm1, %xmm0 @@ -117,6 +127,9 @@ L(loop): jnz L(matches) add $16, %ecx +# ifdef __CHKP__ + bndcu (%ecx),%bnd0 +# endif movdqa (%ecx), %xmm0 pcmpeqd %xmm0, %xmm2 pcmpeqd %xmm1, %xmm0 @@ -126,6 +139,9 @@ L(loop): jnz L(matches) add $16, %ecx +# ifdef __CHKP__ + bndcu (%ecx),%bnd0 +# endif movdqa (%ecx), %xmm0 pcmpeqd %xmm0, %xmm2 pcmpeqd %xmm1, %xmm0 @@ -135,6 +151,9 @@ L(loop): jnz L(matches) add $16, %ecx +# ifdef __CHKP__ + bndcu (%ecx),%bnd0 +# endif movdqa (%ecx), %xmm0 pcmpeqd %xmm0, %xmm2 pcmpeqd %xmm1, %xmm0 @@ -160,11 +179,17 @@ L(match_case2): test $15, %dl jnz L(return_null) lea 4(%ecx), %eax +# ifdef __CHKP__ + bndcu (%eax),%bnd0 +# endif ret .p2align 4 L(match_case2_4): mov %ecx, %eax +# ifdef __CHKP__ + bndcu (%eax),%bnd0 +# endif ret .p2align 4 @@ -176,11 +201,17 @@ L(match_higth_case2): test $15, %dh jnz L(return_null) lea 12(%ecx), %eax +# ifdef __CHKP__ + bndcu (%eax),%bnd0 +# endif ret .p2align 4 L(match_case2_12): lea 8(%ecx), %eax +# ifdef __CHKP__ + bndcu (%eax),%bnd0 +# endif ret .p2align 4 @@ -191,6 +222,9 @@ L(match_case1): test $0x01, %al jnz L(exit0) lea 4(%ecx), %eax +# ifdef __CHKP__ + bndcu (%eax),%bnd0 +# endif ret .p2align 4 @@ -198,16 +232,25 @@ L(match_higth_case1): test $0x01, %ah jnz L(exit3) lea 12(%ecx), %eax +# ifdef __CHKP__ + bndcu (%eax),%bnd0 +# endif ret .p2align 4 L(exit0): mov %ecx, %eax +# ifdef __CHKP__ + bndcu (%eax),%bnd0 +# endif ret .p2align 4 L(exit3): lea 8(%ecx), %eax +# ifdef __CHKP__ + bndcu (%eax),%bnd0 +# endif ret .p2align 4 diff --git a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S b/sysdeps/i386/i686/multiarch/wcscmp-sse2.S index 9b248c1073..108e7fb914 100644 --- a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S +++ b/sysdeps/i386/i686/multiarch/wcscmp-sse2.S @@ -47,6 +47,14 @@ ENTRY (__wcscmp_sse2) */ mov STR1(%esp), %edx mov STR2(%esp), %eax +#ifdef __CHKP__ + bndldx STR1(%esp,%edx,1), %bnd0 + bndldx STR2(%esp,%eax,1), %bnd1 + bndcl (%edx), %bnd0 + bndcl (%eax), %bnd1 + bndcu (%edx), %bnd0 + bndcu (%eax), %bnd1 +#endif mov (%eax), %ecx cmp %ecx, (%edx) diff --git a/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S b/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S index 47fb5164b3..708ef41040 100644 --- a/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S +++ b/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S @@ -41,13 +41,29 @@ ENTRY (__wcscpy_ssse3) mov STR1(%esp), %edx mov STR2(%esp), %ecx +# ifdef __CHKP__ + bndldx STR1(%esp,%edx,1), %bnd0 + bndldx STR2(%esp,%ecx,1), %bnd1 + bndcl (%edx), %bnd0 + bndcl (%ecx), %bnd1 + bndcu (%ecx), %bnd1 +# endif cmp $0, (%ecx) jz L(ExitTail4) +# ifdef __CHKP__ + bndcu 4(%ecx), %bnd1 +# endif cmp $0, 4(%ecx) jz L(ExitTail8) +# ifdef __CHKP__ + bndcu 8(%ecx), %bnd1 +# endif cmp $0, 8(%ecx) jz L(ExitTail12) +# ifdef __CHKP__ + bndcu 12(%ecx), %bnd1 +# endif cmp $0, 12(%ecx) jz L(ExitTail16) @@ -61,6 +77,9 @@ ENTRY (__wcscpy_ssse3) pxor %xmm0, %xmm0 pcmpeqd (%esi), %xmm0 movdqu (%ecx), %xmm1 +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 +# endif movdqu %xmm1, (%edx) pmovmskb %xmm0, %eax @@ -87,6 +106,10 @@ ENTRY (__wcscpy_ssse3) jmp L(Shl12) L(Align16Both): +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 + bndcu 16(%ecx), %bnd1 +# endif movaps (%ecx), %xmm1 movaps 16(%ecx), %xmm2 movaps %xmm1, (%edx) @@ -97,6 +120,10 @@ L(Align16Both): test %eax, %eax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 15(%edx, %esi), %bnd0 + bndcu 16(%ecx, %esi), %bnd1 +# endif movaps 16(%ecx, %esi), %xmm3 movaps %xmm2, (%edx, %esi) pcmpeqd %xmm3, %xmm0 @@ -106,6 +133,10 @@ L(Align16Both): test %eax, %eax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 15(%edx, %esi), %bnd0 + bndcu 16(%ecx, %esi), %bnd1 +# endif movaps 16(%ecx, %esi), %xmm4 movaps %xmm3, (%edx, %esi) pcmpeqd %xmm4, %xmm0 @@ -115,6 +146,10 @@ L(Align16Both): test %eax, %eax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 15(%edx, %esi), %bnd0 + bndcu 16(%ecx, %esi), %bnd1 +# endif movaps 16(%ecx, %esi), %xmm1 movaps %xmm4, (%edx, %esi) pcmpeqd %xmm1, %xmm0 @@ -124,6 +159,10 @@ L(Align16Both): test %eax, %eax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 15(%edx, %esi), %bnd0 + bndcu 16(%ecx, %esi), %bnd1 +# endif movaps 16(%ecx, %esi), %xmm2 movaps %xmm1, (%edx, %esi) pcmpeqd %xmm2, %xmm0 @@ -133,6 +172,10 @@ L(Align16Both): test %eax, %eax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 15(%edx, %esi), %bnd0 + bndcu 16(%ecx, %esi), %bnd1 +# endif movaps 16(%ecx, %esi), %xmm3 movaps %xmm2, (%edx, %esi) pcmpeqd %xmm3, %xmm0 @@ -142,6 +185,9 @@ L(Align16Both): test %eax, %eax jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu 15(%edx, %esi), %bnd0 +# endif movaps %xmm3, (%edx, %esi) mov %ecx, %eax lea 16(%ecx, %esi), %ecx @@ -152,6 +198,9 @@ L(Align16Both): mov $-0x40, %esi L(Aligned64Loop): +# ifdef __CHKP__ + bndcu (%ecx), %bnd1 +# endif movaps (%ecx), %xmm2 movaps 32(%ecx), %xmm3 movaps %xmm2, %xmm4 @@ -168,6 +217,9 @@ L(Aligned64Loop): test %eax, %eax jnz L(Aligned64Leave) +# ifdef __CHKP__ + bndcu -1(%edx), %bnd0 +# endif movaps %xmm4, -64(%edx) movaps %xmm5, -48(%edx) movaps %xmm6, -32(%edx) @@ -182,6 +234,9 @@ L(Aligned64Leave): pcmpeqd %xmm5, %xmm0 pmovmskb %xmm0, %eax +# ifdef __CHKP__ + bndcu -49(%edx), %bnd0 +# endif movaps %xmm4, -64(%edx) test %eax, %eax lea 16(%esi), %esi @@ -189,11 +244,17 @@ L(Aligned64Leave): pcmpeqd %xmm6, %xmm0 pmovmskb %xmm0, %eax +# ifdef __CHKP__ + bndcu -33(%edx), %bnd0 +# endif movaps %xmm5, -48(%edx) test %eax, %eax lea 16(%esi), %esi jnz L(CopyFrom1To16Bytes) +# ifdef __CHKP__ + bndcu -17(%edx), %bnd0 +# endif movaps %xmm6, -32(%edx) pcmpeqd %xmm7, %xmm0 pmovmskb %xmm0, %eax @@ -202,11 +263,17 @@ L(Aligned64Leave): jnz L(CopyFrom1To16Bytes) mov $-0x40, %esi +# ifdef __CHKP__ + bndcu -1(%edx), %bnd0 +# endif movaps %xmm7, -16(%edx) jmp L(Aligned64Loop) .p2align 4 L(Shl4): +# ifdef __CHKP__ + bndcu 12(%ecx), %bnd1 +# endif movaps -4(%ecx), %xmm1 movaps 12(%ecx), %xmm2 L(Shl4Start): @@ -218,6 +285,10 @@ L(Shl4Start): jnz L(Shl4LoopExit) palignr $4, %xmm1, %xmm2 +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 + bndcu 28(%ecx), %bnd1 +# endif movaps %xmm2, (%edx) movaps 28(%ecx), %xmm2 @@ -231,6 +302,10 @@ L(Shl4Start): jnz L(Shl4LoopExit) palignr $4, %xmm3, %xmm2 +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 + bndcu 28(%ecx), %bnd1 +# endif movaps %xmm2, (%edx) movaps 28(%ecx), %xmm2 @@ -244,6 +319,10 @@ L(Shl4Start): jnz L(Shl4LoopExit) palignr $4, %xmm1, %xmm2 +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 + bndcu 28(%ecx), %bnd1 +# endif movaps %xmm2, (%edx) movaps 28(%ecx), %xmm2 @@ -256,6 +335,10 @@ L(Shl4Start): jnz L(Shl4LoopExit) palignr $4, %xmm3, %xmm2 +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 + bndcu 28(%ecx), %bnd1 +# endif movaps %xmm2, (%edx) lea 28(%ecx), %ecx lea 16(%edx), %edx @@ -269,6 +352,9 @@ L(Shl4Start): movaps -4(%ecx), %xmm1 L(Shl4LoopStart): +# ifdef __CHKP__ + bndcu 12(%ecx), %bnd1 +# endif movaps 12(%ecx), %xmm2 movaps 28(%ecx), %xmm3 movaps %xmm3, %xmm6 @@ -290,6 +376,9 @@ L(Shl4LoopStart): lea 64(%ecx), %ecx palignr $4, %xmm1, %xmm2 movaps %xmm7, %xmm1 +# ifdef __CHKP__ + bndcu 63(%edx), %bnd0 +# endif movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) @@ -300,6 +389,10 @@ L(Shl4LoopStart): L(Shl4LoopExit): movlpd (%ecx), %xmm0 movl 8(%ecx), %esi +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 +# endif + movaps %xmm2, (%edx) movlpd %xmm0, (%edx) movl %esi, 8(%edx) POP (%esi) @@ -310,6 +403,9 @@ L(Shl4LoopExit): test $0x01, %al jnz L(Exit4) movlpd (%ecx), %xmm0 +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 +# endif movlpd %xmm0, (%edx) movl %edi, %eax RETURN @@ -318,6 +414,9 @@ L(Shl4LoopExit): .p2align 4 L(Shl8): +# ifdef __CHKP__ + bndcu 8(%ecx), %bnd1 +# endif movaps -8(%ecx), %xmm1 movaps 8(%ecx), %xmm2 L(Shl8Start): @@ -329,6 +428,10 @@ L(Shl8Start): jnz L(Shl8LoopExit) palignr $8, %xmm1, %xmm2 +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 + bndcu 24(%ecx), %bnd1 +# endif movaps %xmm2, (%edx) movaps 24(%ecx), %xmm2 @@ -342,6 +445,10 @@ L(Shl8Start): jnz L(Shl8LoopExit) palignr $8, %xmm3, %xmm2 +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 + bndcu 24(%ecx), %bnd1 +# endif movaps %xmm2, (%edx) movaps 24(%ecx), %xmm2 @@ -355,6 +462,10 @@ L(Shl8Start): jnz L(Shl8LoopExit) palignr $8, %xmm1, %xmm2 +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 + bndcu 24(%ecx), %bnd1 +# endif movaps %xmm2, (%edx) movaps 24(%ecx), %xmm2 @@ -367,6 +478,9 @@ L(Shl8Start): jnz L(Shl8LoopExit) palignr $8, %xmm3, %xmm2 +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 +# endif movaps %xmm2, (%edx) lea 24(%ecx), %ecx lea 16(%edx), %edx @@ -380,6 +494,9 @@ L(Shl8Start): movaps -8(%ecx), %xmm1 L(Shl8LoopStart): +# ifdef __CHKP__ + bndcu 8(%ecx), %bnd1 +# endif movaps 8(%ecx), %xmm2 movaps 24(%ecx), %xmm3 movaps %xmm3, %xmm6 @@ -401,6 +518,9 @@ L(Shl8LoopStart): lea 64(%ecx), %ecx palignr $8, %xmm1, %xmm2 movaps %xmm7, %xmm1 +# ifdef __CHKP__ + bndcu 63(%edx), %bnd0 +# endif movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) @@ -410,6 +530,9 @@ L(Shl8LoopStart): L(Shl8LoopExit): movlpd (%ecx), %xmm0 +# ifdef __CHKP__ + bndcu 7(%edx), %bnd0 +# endif movlpd %xmm0, (%edx) POP (%esi) add $8, %edx @@ -419,6 +542,9 @@ L(Shl8LoopExit): test $0x01, %al jnz L(Exit4) movlpd (%ecx), %xmm0 +# ifdef __CHKP__ + bndcu 7(%edx), %bnd0 +# endif movlpd %xmm0, (%edx) movl %edi, %eax RETURN @@ -427,6 +553,9 @@ L(Shl8LoopExit): .p2align 4 L(Shl12): +# ifdef __CHKP__ + bndcu 4(%ecx), %bnd1 +# endif movaps -12(%ecx), %xmm1 movaps 4(%ecx), %xmm2 L(Shl12Start): @@ -438,6 +567,10 @@ L(Shl12Start): jnz L(Shl12LoopExit) palignr $12, %xmm1, %xmm2 +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 + bndcu 20(%ecx), %bnd1 +# endif movaps %xmm2, (%edx) movaps 20(%ecx), %xmm2 @@ -451,6 +584,10 @@ L(Shl12Start): jnz L(Shl12LoopExit) palignr $12, %xmm3, %xmm2 +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 + bndcu 20(%ecx), %bnd1 +# endif movaps %xmm2, (%edx) movaps 20(%ecx), %xmm2 @@ -464,6 +601,10 @@ L(Shl12Start): jnz L(Shl12LoopExit) palignr $12, %xmm1, %xmm2 +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 + bndcu 20(%ecx), %bnd1 +# endif movaps %xmm2, (%edx) movaps 20(%ecx), %xmm2 @@ -476,6 +617,9 @@ L(Shl12Start): jnz L(Shl12LoopExit) palignr $12, %xmm3, %xmm2 +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 +# endif movaps %xmm2, (%edx) lea 20(%ecx), %ecx lea 16(%edx), %edx @@ -489,6 +633,9 @@ L(Shl12Start): movaps -12(%ecx), %xmm1 L(Shl12LoopStart): +# ifdef __CHKP__ + bndcu 4(%ecx), %bnd1 +# endif movaps 4(%ecx), %xmm2 movaps 20(%ecx), %xmm3 movaps %xmm3, %xmm6 @@ -510,6 +657,9 @@ L(Shl12LoopStart): lea 64(%ecx), %ecx palignr $12, %xmm1, %xmm2 movaps %xmm7, %xmm1 +# ifdef __CHKP__ + bndcu 63(%edx), %bnd0 +# endif movaps %xmm5, 48(%edx) movaps %xmm4, 32(%edx) movaps %xmm3, 16(%edx) @@ -519,6 +669,9 @@ L(Shl12LoopStart): L(Shl12LoopExit): movl (%ecx), %esi +# ifdef __CHKP__ + bndcu 3(%edx), %bnd0 +# endif movl %esi, (%edx) mov $4, %esi @@ -533,6 +686,10 @@ L(CopyFrom1To16Bytes): test $0x01, %al jnz L(Exit4) L(Exit8): +# ifdef __CHKP__ + bndcu 7(%edx), %bnd0 + bndcu 7(%ecx), %bnd1 +# endif movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movl %edi, %eax @@ -543,6 +700,10 @@ L(ExitHigh): test $0x01, %ah jnz L(Exit12) L(Exit16): +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 + bndcu 15(%ecx), %bnd1 +# endif movdqu (%ecx), %xmm0 movdqu %xmm0, (%edx) movl %edi, %eax @@ -550,6 +711,10 @@ L(Exit16): .p2align 4 L(Exit4): +# ifdef __CHKP__ + bndcu 3(%edx), %bnd0 + bndcu 3(%ecx), %bnd1 +# endif movl (%ecx), %eax movl %eax, (%edx) movl %edi, %eax @@ -557,6 +722,10 @@ L(Exit4): .p2align 4 L(Exit12): +# ifdef __CHKP__ + bndcu 11(%edx), %bnd0 + bndcu 11(%ecx), %bnd1 +# endif movlpd (%ecx), %xmm0 movlpd %xmm0, (%edx) movl 8(%ecx), %eax @@ -569,6 +738,9 @@ CFI_POP (%edi) .p2align 4 L(ExitTail4): movl (%ecx), %eax +# ifdef __CHKP__ + bndcu 3(%edx), %bnd0 +# endif movl %eax, (%edx) movl %edx, %eax ret @@ -576,6 +748,9 @@ L(ExitTail4): .p2align 4 L(ExitTail8): movlpd (%ecx), %xmm0 +# ifdef __CHKP__ + bndcu 7(%edx), %bnd0 +# endif movlpd %xmm0, (%edx) movl %edx, %eax ret @@ -583,6 +758,9 @@ L(ExitTail8): .p2align 4 L(ExitTail12): movlpd (%ecx), %xmm0 +# ifdef __CHKP__ + bndcu 11(%edx), %bnd0 +# endif movlpd %xmm0, (%edx) movl 8(%ecx), %eax movl %eax, 8(%edx) @@ -592,6 +770,9 @@ L(ExitTail12): .p2align 4 L(ExitTail16): movdqu (%ecx), %xmm0 +# ifdef __CHKP__ + bndcu 15(%edx), %bnd0 +# endif movdqu %xmm0, (%edx) movl %edx, %eax ret diff --git a/sysdeps/i386/i686/multiarch/wcslen-sse2.S b/sysdeps/i386/i686/multiarch/wcslen-sse2.S index a92b92ffd3..9c53149261 100644 --- a/sysdeps/i386/i686/multiarch/wcslen-sse2.S +++ b/sysdeps/i386/i686/multiarch/wcslen-sse2.S @@ -24,21 +24,47 @@ .text ENTRY (__wcslen_sse2) mov STR(%esp), %edx +# ifdef __CHKP__ + bndldx STR(%esp,%edx,1), %bnd0 + bndcl (%edx),%bnd0 + bndcu (%edx),%bnd0 +# endif cmp $0, (%edx) jz L(exit_tail0) +# ifdef __CHKP__ + bndcu 4(%edx), %bnd0 +# endif cmp $0, 4(%edx) jz L(exit_tail1) +# ifdef __CHKP__ + bndcu 8(%edx), %bnd0 +# endif cmp $0, 8(%edx) jz L(exit_tail2) +# ifdef __CHKP__ + bndcu 12(%edx), %bnd0 +# endif cmp $0, 12(%edx) jz L(exit_tail3) +# ifdef __CHKP__ + bndcu 16(%edx), %bnd0 +# endif cmp $0, 16(%edx) jz L(exit_tail4) +# ifdef __CHKP__ + bndcu 20(%edx), %bnd0 +# endif cmp $0, 20(%edx) jz L(exit_tail5) +# ifdef __CHKP__ + bndcu 24(%edx), %bnd0 +# endif cmp $0, 24(%edx) jz L(exit_tail6) +# ifdef __CHKP__ + bndcu 28(%edx), %bnd0 +# endif cmp $0, 28(%edx) jz L(exit_tail7) @@ -48,6 +74,9 @@ ENTRY (__wcslen_sse2) lea 16(%edx), %ecx and $-16, %eax +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif pcmpeqd (%eax), %xmm0 pmovmskb %xmm0, %edx pxor %xmm1, %xmm1 @@ -55,6 +84,9 @@ ENTRY (__wcslen_sse2) lea 16(%eax), %eax jnz L(exit) +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif pcmpeqd (%eax), %xmm1 pmovmskb %xmm1, %edx pxor %xmm2, %xmm2 @@ -62,6 +94,9 @@ ENTRY (__wcslen_sse2) lea 16(%eax), %eax jnz L(exit) +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif pcmpeqd (%eax), %xmm2 pmovmskb %xmm2, %edx pxor %xmm3, %xmm3 @@ -69,6 +104,9 @@ ENTRY (__wcslen_sse2) lea 16(%eax), %eax jnz L(exit) +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif pcmpeqd (%eax), %xmm3 pmovmskb %xmm3, %edx test %edx, %edx @@ -79,6 +117,9 @@ ENTRY (__wcslen_sse2) .p2align 4 L(aligned_64_loop): +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif movaps (%eax), %xmm0 movaps 16(%eax), %xmm1 movaps 32(%eax), %xmm2 @@ -129,6 +170,10 @@ L(exit): mov %dl, %cl and $15, %cl jz L(exit_1) +# ifdef __CHKP__ + mov STR(%esp), %edx + bndcu -1(%edx, %eax, 4), %bnd0 +# endif ret .p2align 4 @@ -137,16 +182,28 @@ L(exit_high): and $15, %ch jz L(exit_3) add $2, %eax +# ifdef __CHKP__ + mov STR(%esp), %edx + bndcu -1(%edx, %eax, 4), %bnd0 +# endif ret .p2align 4 L(exit_1): add $1, %eax +# ifdef __CHKP__ + mov STR(%esp), %edx + bndcu -1(%edx, %eax, 4), %bnd0 +# endif ret .p2align 4 L(exit_3): add $3, %eax +# ifdef __CHKP__ + mov STR(%esp), %edx + bndcu -1(%edx, %eax, 4), %bnd0 +# endif ret .p2align 4 diff --git a/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S b/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S index d31e48e43f..f7c70e6de8 100644 --- a/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S +++ b/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S @@ -36,12 +36,23 @@ # define STR1 PARMS # define STR2 STR1+4 +# ifdef __CHKP__ +# undef RETURN +# define RETURN bndcu (%eax),%bnd0; \ + POP (%edi); ret; CFI_PUSH (%edi); +# endif + atom_text_section ENTRY (__wcsrchr_sse2) ENTRANCE mov STR1(%esp), %ecx movd STR2(%esp), %xmm1 +# ifdef __CHKP__ + bndldx STR1(%esp,%ecx,1), %bnd0 + bndcl (%ecx),%bnd0 + bndcu (%ecx),%bnd0 +# endif mov %ecx, %edi punpckldq %xmm1, %xmm1 @@ -137,6 +148,9 @@ L(unaligned_match): /* Loop start on aligned string. */ .p2align 4 L(loop): +# ifdef __CHKP__ + bndcu (%edi),%bnd0 +# endif movdqa (%edi), %xmm0 pcmpeqd %xmm0, %xmm2 add $16, %edi @@ -146,6 +160,9 @@ L(loop): or %eax, %ecx jnz L(matches) +# ifdef __CHKP__ + bndcu (%edi),%bnd0 +# endif movdqa (%edi), %xmm3 pcmpeqd %xmm3, %xmm2 add $16, %edi @@ -155,6 +172,9 @@ L(loop): or %eax, %ecx jnz L(matches) +# ifdef __CHKP__ + bndcu (%edi),%bnd0 +# endif movdqa (%edi), %xmm4 pcmpeqd %xmm4, %xmm2 add $16, %edi @@ -164,6 +184,9 @@ L(loop): or %eax, %ecx jnz L(matches) +# ifdef __CHKP__ + bndcu (%edi),%bnd0 +# endif movdqa (%edi), %xmm5 pcmpeqd %xmm5, %xmm2 add $16, %edi diff --git a/sysdeps/i386/i686/strcmp.S b/sysdeps/i386/i686/strcmp.S index 6ca6220a02..67134af471 100644 --- a/sysdeps/i386/i686/strcmp.S +++ b/sysdeps/i386/i686/strcmp.S @@ -29,8 +29,19 @@ ENTRY (strcmp) movl STR1(%esp), %ecx movl STR2(%esp), %edx - -L(oop): movb (%ecx), %al +#ifdef __CHKP__ + bndldx STR1(%esp,%ecx,1), %bnd0 + bndldx STR2(%esp,%edx,1), %bnd1 + bndcl (%ecx), %bnd0 + bndcl (%edx), %bnd1 +#endif + +L(oop): +#ifdef __CHKP__ + bndcu (%ecx), %bnd0 + bndcu (%edx), %bnd1 +#endif + movb (%ecx), %al cmpb (%edx), %al jne L(neq) incl %ecx diff --git a/sysdeps/i386/i686/strtok.S b/sysdeps/i386/i686/strtok.S index 8848faf4d9..78a2ea9fce 100644 --- a/sysdeps/i386/i686/strtok.S +++ b/sysdeps/i386/i686/strtok.S @@ -121,6 +121,14 @@ ENTRY (FUNCTION) testl %edx, %edx jz L(returnNULL) movl DELIM(%esp), %eax /* Get start of delimiter set. */ +#ifdef __CHKP__ + bndldx STR(%esp,%edx,1),%bnd0 + bndldx DELIM(%esp,%eax,1),%bnd1 + bndcl (%edx), %bnd0 + bndcu (%edx), %bnd0 + bndcl (%eax), %bnd1 + bndcu (%eax), %bnd1 +#endif /* For understanding the following code remember that %ecx == 0 now. Although all the following instruction only modify %cl we always diff --git a/sysdeps/i386/memchr.S b/sysdeps/i386/memchr.S index 67995002ed..39fe616974 100644 --- a/sysdeps/i386/memchr.S +++ b/sysdeps/i386/memchr.S @@ -51,6 +51,11 @@ ENTRY (__memchr) movl LEN(%esp), %esi /* len: length of memory block. */ cfi_rel_offset (esi, 4) +#ifdef __CHKP__ + bndldx STR(%esp,%eax,1), %bnd0 + bndcl (%eax), %bnd0 +#endif + /* If my must not test more than three characters test them one by one. This is especially true for 0. */ cmpl $4, %esi @@ -72,6 +77,9 @@ ENTRY (__memchr) testb $3, %al /* correctly aligned ? */ je L(2) /* yes => begin loop */ +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif cmpb %dl, (%eax) /* compare byte */ je L(9) /* target found => return */ incl %eax /* increment source pointer */ @@ -80,6 +88,9 @@ ENTRY (__memchr) testb $3, %al /* correctly aligned ? */ je L(2) /* yes => begin loop */ +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif cmpb %dl, (%eax) /* compare byte */ je L(9) /* target found => return */ incl %eax /* increment source pointer */ @@ -88,6 +99,9 @@ ENTRY (__memchr) testb $3, %al /* correctly aligned ? */ je L(2) /* yes => begin loop */ +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif cmpb %dl, (%eax) /* compare byte */ je L(9) /* target found => return */ incl %eax /* increment source pointer */ @@ -127,7 +141,11 @@ ENTRY (__memchr) ALIGN (4) -L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */ +L(1): +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif + movl (%eax), %ecx /* get word (= 4 bytes) in question */ movl $0xfefefeff, %edi /* magic value */ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c are now 0 */ @@ -162,6 +180,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */ (following LL(13) below). Even the len can be compared with constants instead of decrementing each time. */ +#ifdef __CHKP__ + bndcu 4(%eax), %bnd0 +#endif movl 4(%eax), %ecx /* get word (= 4 bytes) in question */ movl $0xfefefeff, %edi /* magic value */ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c @@ -176,6 +197,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */ the addition will not result in 0. */ jnz L(7) /* found it => return pointer */ +#ifdef __CHKP__ + bndcu 8(%eax), %bnd0 +#endif movl 8(%eax), %ecx /* get word (= 4 bytes) in question */ movl $0xfefefeff, %edi /* magic value */ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c @@ -190,6 +214,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */ the addition will not result in 0. */ jnz L(6) /* found it => return pointer */ +#ifdef __CHKP__ + bndcu 12(%eax), %bnd0 +#endif movl 12(%eax), %ecx /* get word (= 4 bytes) in question */ movl $0xfefefeff, %edi /* magic value */ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c @@ -213,6 +240,9 @@ L(2): subl $16, %esi cmpl $4-16, %esi /* rest < 4 bytes? */ jb L(3) /* yes, than test byte by byte */ +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif movl (%eax), %ecx /* get word (= 4 bytes) in question */ movl $0xfefefeff, %edi /* magic value */ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c @@ -231,6 +261,9 @@ L(2): subl $16, %esi cmpl $8-16, %esi /* rest < 8 bytes? */ jb L(3) /* yes, than test byte by byte */ +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif movl (%eax), %ecx /* get word (= 4 bytes) in question */ movl $0xfefefeff, %edi /* magic value */ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c @@ -249,6 +282,9 @@ L(2): subl $16, %esi cmpl $12-16, %esi /* rest < 12 bytes? */ jb L(3) /* yes, than test byte by byte */ +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif movl (%eax), %ecx /* get word (= 4 bytes) in question */ movl $0xfefefeff, %edi /* magic value */ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c @@ -268,18 +304,27 @@ L(2): subl $16, %esi L(3): andl $3, %esi /* mask out uninteresting bytes */ jz L(4) /* no remaining bytes => return NULL */ +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif cmpb %dl, (%eax) /* compare byte with CHR */ je L(9) /* equal, than return pointer */ incl %eax /* increment source pointer */ decl %esi /* decrement length */ jz L(4) /* no remaining bytes => return NULL */ +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif cmpb %dl, (%eax) /* compare byte with CHR */ je L(9) /* equal, than return pointer */ incl %eax /* increment source pointer */ decl %esi /* decrement length */ jz L(4) /* no remaining bytes => return NULL */ +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif cmpb %dl, (%eax) /* compare byte with CHR */ je L(9) /* equal, than return pointer */ diff --git a/sysdeps/i386/memcmp.S b/sysdeps/i386/memcmp.S index 21e0bfcd1c..7beab65325 100644 --- a/sysdeps/i386/memcmp.S +++ b/sysdeps/i386/memcmp.S @@ -37,6 +37,12 @@ ENTRY (memcmp) cfi_rel_offset (esi, 0) movl BLK2(%esp), %edi movl LEN(%esp), %ecx +#ifdef __CHKP__ + bndldx BLK1(%esp,%esi,1), %bnd0 + bndldx BLK2(%esp,%edi,1), %bnd1 + bndcl (%esi), %bnd0 + bndcl (%edi), %bnd1 +#endif cld /* Set direction of comparison. */ @@ -59,7 +65,13 @@ ENTRY (memcmp) Note that the following operation does not change 0xffffffff. */ orb $1, %al /* Change 0 to 1. */ -L(1): popl %esi /* Restore registers. */ +L(1): +#ifdef __CHKP__ + bndcu (%esi), %bnd0 + bndcu (%edi), %bnd1 +#endif + popl %esi /* Restore registers. */ + cfi_adjust_cfa_offset (-4) cfi_restore (esi) movl %edx, %edi diff --git a/sysdeps/i386/rawmemchr.S b/sysdeps/i386/rawmemchr.S index 2bd20e0459..27441ddf73 100644 --- a/sysdeps/i386/rawmemchr.S +++ b/sysdeps/i386/rawmemchr.S @@ -46,6 +46,11 @@ ENTRY (__rawmemchr) movl STR(%esp), %eax movl CHR(%esp), %edx +#ifdef __CHKP__ + bndldx STR(%esp,%eax,1), %bnd0 + bndcl (%eax), %bnd0 +#endif + /* At the moment %edx contains C. What we need for the algorithm is C in all bytes of the dword. Avoid operations on 16 bit words because these require an @@ -62,18 +67,27 @@ ENTRY (__rawmemchr) testb $3, %al /* correctly aligned ? */ je L(1) /* yes => begin loop */ +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif cmpb %dl, (%eax) /* compare byte */ je L(9) /* target found => return */ incl %eax /* increment source pointer */ testb $3, %al /* correctly aligned ? */ je L(1) /* yes => begin loop */ +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif cmpb %dl, (%eax) /* compare byte */ je L(9) /* target found => return */ incl %eax /* increment source pointer */ testb $3, %al /* correctly aligned ? */ je L(1) /* yes => begin loop */ +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif cmpb %dl, (%eax) /* compare byte */ je L(9) /* target found => return */ incl %eax /* increment source pointer */ @@ -108,7 +122,11 @@ ENTRY (__rawmemchr) /* Each round the main loop processes 16 bytes. */ ALIGN (4) -L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */ +L(1): +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif + movl (%eax), %ecx /* get word (= 4 bytes) in question */ movl $0xfefefeff, %edi /* magic value */ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c are now 0 */ @@ -143,6 +161,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */ (following LL(13) below). Even the len can be compared with constants instead of decrementing each time. */ +#ifdef __CHKP__ + bndcu 4(%eax), %bnd0 +#endif movl 4(%eax), %ecx /* get word (= 4 bytes) in question */ movl $0xfefefeff, %edi /* magic value */ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c @@ -157,6 +178,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */ the addition will not result in 0. */ jnz L(7) /* found it => return pointer */ +#ifdef __CHKP__ + bndcu 8(%eax), %bnd0 +#endif movl 8(%eax), %ecx /* get word (= 4 bytes) in question */ movl $0xfefefeff, %edi /* magic value */ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c @@ -171,6 +195,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */ the addition will not result in 0. */ jnz L(6) /* found it => return pointer */ +#ifdef __CHKP__ + bndcu 12(%eax), %bnd0 +#endif movl 12(%eax), %ecx /* get word (= 4 bytes) in question */ movl $0xfefefeff, %edi /* magic value */ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c @@ -211,6 +238,9 @@ L(8): testb %cl, %cl /* test first byte in dword */ /* No further test needed we we know it is one of the four bytes. */ L(9): +#ifdef __CHKP__ + bndcu (%eax), %bnd0 +#endif popl %edi /* pop saved register */ cfi_adjust_cfa_offset (-4) cfi_restore (edi) diff --git a/sysdeps/i386/stpncpy.S b/sysdeps/i386/stpncpy.S index b23e8208a0..22d727a356 100644 --- a/sysdeps/i386/stpncpy.S +++ b/sysdeps/i386/stpncpy.S @@ -42,6 +42,14 @@ ENTRY (__stpncpy) movl SRC(%esp), %esi cfi_rel_offset (esi, 0) movl LEN(%esp), %ecx +#ifdef __CHKP__ + bndldx DEST(%esp,%eax,1), %bnd0 + bndldx SRC(%esp,%esi,1), %bnd1 + bndcl (%eax), %bnd0 + bndcu -1(%eax, %ecx), %bnd0 + bndcl (%esi), %bnd1 + bndcu (%esi), %bnd1 +#endif subl %eax, %esi /* magic: reduce number of loop variants to one using addressing mode */ diff --git a/sysdeps/i386/strchrnul.S b/sysdeps/i386/strchrnul.S index 7ceb88ed8b..86bf770aef 100644 --- a/sysdeps/i386/strchrnul.S +++ b/sysdeps/i386/strchrnul.S @@ -38,6 +38,11 @@ ENTRY (__strchrnul) movl STR(%esp), %eax movl CHR(%esp), %edx +# ifdef __CHKP__ + bndldx STR(%esp,%eax,1), %bnd0 + bndcl (%eax), %bnd0 + bndcu (%eax), %bnd0 +# endif /* At the moment %edx contains CHR. What we need for the algorithm is CHR in all bytes of the dword. Avoid operations on 16 bit words because these require an @@ -60,6 +65,9 @@ ENTRY (__strchrnul) testb $3, %al /* correctly aligned ? */ jz L(11) /* yes => begin loop */ +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif movb (%eax), %cl /* load byte in question (we need it twice) */ cmpb %cl, %dl /* compare byte */ je L(6) /* target found => return */ @@ -69,6 +77,9 @@ ENTRY (__strchrnul) testb $3, %al /* correctly aligned ? */ jz L(11) /* yes => begin loop */ +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif movb (%eax), %cl /* load byte in question (we need it twice) */ cmpb %cl, %dl /* compare byte */ je L(6) /* target found => return */ @@ -78,6 +89,9 @@ ENTRY (__strchrnul) testb $3, %al /* correctly aligned ? */ jz L(11) /* yes => begin loop */ +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif movb (%eax), %cl /* load byte in question (we need it twice) */ cmpb %cl, %dl /* compare byte */ je L(6) /* target found => return */ @@ -120,7 +134,11 @@ ENTRY (__strchrnul) L(1): addl $16, %eax /* adjust pointer for whole round */ -L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */ +L(11): +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif + movl (%eax), %ecx /* get word (= 4 bytes) in question */ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c are now 0 */ movl $0xfefefeff, %edi /* magic value */ @@ -164,6 +182,9 @@ L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */ the addition will not result in 0. */ jnz L(7) /* found NUL => return NULL */ +# ifdef __CHKP__ + bndcu 4(%eax), %bnd0 +# endif movl 4(%eax), %ecx /* get word (= 4 bytes) in question */ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c are now 0 */ @@ -189,6 +210,9 @@ L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */ the addition will not result in 0. */ jnz L(71) /* found NUL => return NULL */ +# ifdef __CHKP__ + bndcu 8(%eax), %bnd0 +# endif movl 8(%eax), %ecx /* get word (= 4 bytes) in question */ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c are now 0 */ @@ -214,6 +238,9 @@ L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */ the addition will not result in 0. */ jnz L(72) /* found NUL => return NULL */ +# ifdef __CHKP__ + bndcu 12(%eax), %bnd0 +# endif movl 12(%eax), %ecx /* get word (= 4 bytes) in question */ xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c are now 0 */ @@ -268,7 +295,11 @@ L(7): testb %cl, %cl /* is first byte CHR? */ /* It must be in the fourth byte and it cannot be NUL. */ incl %eax -L(6): popl %edi /* restore saved register content */ +L(6): +# ifdef __CHKP__ + bndcu (%eax), %bnd0 +# endif + popl %edi /* restore saved register content */ cfi_adjust_cfa_offset (-4) cfi_restore (edi) diff --git a/sysdeps/i386/strcspn.S b/sysdeps/i386/strcspn.S index 0c262d6001..1352b038f0 100644 --- a/sysdeps/i386/strcspn.S +++ b/sysdeps/i386/strcspn.S @@ -32,6 +32,14 @@ ENTRY (strcspn) movl STR(%esp), %edx movl STOP(%esp), %eax +#ifdef __CHKP__ + bndldx STR(%esp,%edx,1), %bnd0 + bndldx STOP(%esp,%eax,1), %bnd1 + bndcl (%edx), %bnd0 + bndcl (%eax), %bnd1 + bndcu (%edx), %bnd0 + bndcu (%eax), %bnd1 +#endif /* First we create a table with flags for all possible characters. For the ASCII (7bit/8bit) or ISO-8859-X character sets which are diff --git a/sysdeps/i386/strpbrk.S b/sysdeps/i386/strpbrk.S index 246ae27c53..7190a064ac 100644 --- a/sysdeps/i386/strpbrk.S +++ b/sysdeps/i386/strpbrk.S @@ -33,6 +33,14 @@ ENTRY (strpbrk) movl STR(%esp), %edx movl STOP(%esp), %eax +#ifdef __CHKP__ + bndldx STR(%esp,%edx,1), %bnd0 + bndldx STOP(%esp,%eax,1), %bnd1 + bndcl (%edx), %bnd0 + bndcl (%eax), %bnd1 + bndcu (%edx), %bnd0 + bndcu (%eax), %bnd1 +#endif /* First we create a table with flags for all possible characters. For the ASCII (7bit/8bit) or ISO-8859-X character sets which are diff --git a/sysdeps/i386/strrchr.S b/sysdeps/i386/strrchr.S index 31b8a4562c..858bba463c 100644 --- a/sysdeps/i386/strrchr.S +++ b/sysdeps/i386/strrchr.S @@ -40,6 +40,10 @@ ENTRY (strrchr) movl STR(%esp), %esi cfi_rel_offset (esi, 0) movl CHR(%esp), %ecx +#ifdef __CHKP__ + bndldx STR(%esp,%esi,1), %bnd0 + bndcl (%esi), %bnd0 +#endif /* At the moment %ecx contains C. What we need for the algorithm is C in all bytes of the dword. Avoid @@ -63,6 +67,9 @@ ENTRY (strrchr) testl $3, %esi /* correctly aligned ? */ jz L(19) /* yes => begin loop */ +#ifdef __CHKP__ + bndcu (%esi), %bnd0 +#endif movb (%esi), %dl /* load byte in question (we need it twice) */ cmpb %dl, %cl /* compare byte */ jne L(11) /* target found => return */ @@ -73,6 +80,9 @@ L(11): orb %dl, %dl /* is NUL? */ testl $3, %esi /* correctly aligned ? */ jz L(19) /* yes => begin loop */ +#ifdef __CHKP__ + bndcu (%esi), %bnd0 +#endif movb (%esi), %dl /* load byte in question (we need it twice) */ cmpb %dl, %cl /* compare byte */ jne L(12) /* target found => return */ @@ -83,6 +93,9 @@ L(12): orb %dl, %dl /* is NUL? */ testl $3, %esi /* correctly aligned ? */ jz L(19) /* yes => begin loop */ +#ifdef __CHKP__ + bndcu (%esi), %bnd0 +#endif movb (%esi), %dl /* load byte in question (we need it twice) */ cmpb %dl, %cl /* compare byte */ jne L(13) /* target found => return */ @@ -170,7 +183,11 @@ L(51): L(1): addl $16, %esi /* increment pointer for full round */ -L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */ +L(19): +#ifdef __CHKP__ + bndcu (%esi), %bnd0 +#endif + movl (%esi), %edx /* get word (= 4 bytes) in question */ movl $0xfefefeff, %edi /* magic value */ addl %edx, %edi /* add the magic value to the word. We get carry bits reported for each byte which @@ -214,6 +231,9 @@ L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */ the addition will not result in 0. */ jnz L(3) /* C is detected in the word => examine it */ +#ifdef __CHKP__ + bndcu 4(%esi), %bnd0 +#endif movl 4(%esi), %edx /* get word (= 4 bytes) in question */ movl $0xfefefeff, %edi /* magic value */ addl %edx, %edi /* add the magic value to the word. We get @@ -238,6 +258,9 @@ L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */ the addition will not result in 0. */ jnz L(31) /* C is detected in the word => examine it */ +#ifdef __CHKP__ + bndcu 8(%esi), %bnd0 +#endif movl 8(%esi), %edx /* get word (= 4 bytes) in question */ movl $0xfefefeff, %edi /* magic value */ addl %edx, %edi /* add the magic value to the word. We get @@ -262,6 +285,9 @@ L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */ the addition will not result in 0. */ jnz L(32) /* C is detected in the word => examine it */ +#ifdef __CHKP__ + bndcu 12(%esi), %bnd0 +#endif movl 12(%esi), %edx /* get word (= 4 bytes) in question */ movl $0xfefefeff, %edi /* magic value */ addl %edx, %edi /* add the magic value to the word. We get diff --git a/sysdeps/i386/strtok.S b/sysdeps/i386/strtok.S index 79d540b603..cfee507da2 100644 --- a/sysdeps/i386/strtok.S +++ b/sysdeps/i386/strtok.S @@ -67,6 +67,11 @@ ENTRY (FUNCTION) movl STR(%esp), %edx movl DELIM(%esp), %eax +#ifdef __CHKP__ + bndldx STR(%esp,%edx,1), %bnd0 + bndldx DELIM(%esp,%eax,1), %bnd1 +#endif + #if !defined USE_AS_STRTOK_R && defined PIC pushl %ebx /* Save PIC register. */ cfi_adjust_cfa_offset (4) @@ -336,6 +341,9 @@ L(11): /* Store the pointer to the next character. */ #ifdef USE_AS_STRTOK_R movl SAVE(%esp), %ecx +# ifdef __CHKP__ + bndmov %bnd2, %bnd0 +# endif #endif movl %edx, SAVE_PTR @@ -351,6 +359,9 @@ L(returnNULL): xorl %eax, %eax #ifdef USE_AS_STRTOK_R movl SAVE(%esp), %ecx +# ifdef __CHKP__ + bndmov %bnd2, %bnd0 +# endif #endif movl %edx, SAVE_PTR jmp L(epilogue) -- cgit v1.2.1