summaryrefslogtreecommitdiff
path: root/sysdeps/i386/i686/multiarch/strcat-sse2.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/i386/i686/multiarch/strcat-sse2.S')
-rw-r--r--sysdeps/i386/i686/multiarch/strcat-sse2.S186
1 files changed, 186 insertions, 0 deletions
diff --git a/sysdeps/i386/i686/multiarch/strcat-sse2.S b/sysdeps/i386/i686/multiarch/strcat-sse2.S
index 62d60cdb78..b1d39ae5cb 100644
--- a/sysdeps/i386/i686/multiarch/strcat-sse2.S
+++ b/sysdeps/i386/i686/multiarch/strcat-sse2.S
@@ -95,10 +95,20 @@ ENTRY (STRCAT)
test %ebx, %ebx
jz L(ExitZero)
# endif
+# ifdef __CHKP__
+ bndldx STR1(%esp,%eax,1), %bnd0
+ bndldx STR2(%esp,%esi,1), %bnd1
+ bndcl (%esi), %bnd1
+ bndcu (%esi), %bnd1
+# endif
cmpb $0, (%esi)
mov %esi, %ecx
mov %eax, %edx
jz L(ExitZero)
+# ifdef __CHKP__
+ bndcl (%eax), %bnd0
+ bndcu (%eax), %bnd0
+# endif
and $63, %ecx
and $63, %edx
@@ -113,6 +123,9 @@ ENTRY (STRCAT)
movdqu (%eax), %xmm1
movdqu (%esi), %xmm5
pcmpeqb %xmm1, %xmm0
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
movdqu 16(%esi), %xmm6
pmovmskb %xmm0, %ecx
pcmpeqb %xmm5, %xmm4
@@ -132,6 +145,9 @@ L(alignment_prolog):
and $-16, %eax
pcmpeqb (%eax), %xmm0
movdqu (%esi), %xmm5
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
movdqu 16(%esi), %xmm6
pmovmskb %xmm0, %edx
pcmpeqb %xmm5, %xmm4
@@ -148,21 +164,33 @@ L(loop_prolog):
pxor %xmm3, %xmm3
.p2align 4
L(align16_loop):
+# ifdef __CHKP__
+ bndcu 16(%eax), %bnd0
+# endif
pcmpeqb 16(%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%eax), %bnd0
+# endif
pcmpeqb 32(%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%eax), %bnd0
+# endif
pcmpeqb 48(%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%eax), %bnd0
+# endif
pcmpeqb 64(%eax), %xmm3
pmovmskb %xmm3, %edx
lea 64(%eax), %eax
@@ -212,6 +240,9 @@ L(StartStrcpyPart):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail1)
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm5, (%eax)
pmovmskb %xmm7, %edx
# ifdef USE_AS_STRNCAT
@@ -250,21 +281,33 @@ L(StrlenCore7_1):
.p2align 4
L(align16_loop_1):
+# ifdef __CHKP__
+ bndcu 16(%eax), %bnd0
+# endif
pcmpeqb 16(%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16_1)
+# ifdef __CHKP__
+ bndcu 32(%eax), %bnd0
+# endif
pcmpeqb 32(%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32_1)
+# ifdef __CHKP__
+ bndcu 48(%eax), %bnd0
+# endif
pcmpeqb 48(%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48_1)
+# ifdef __CHKP__
+ bndcu 64(%eax), %bnd0
+# endif
pcmpeqb 64(%eax), %xmm3
pmovmskb %xmm3, %edx
lea 64(%eax), %eax
@@ -323,6 +366,9 @@ L(StartStrcpyPart_1):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail)
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
pcmpeqb 16(%esi), %xmm0
pmovmskb %xmm0, %edx
# ifdef USE_AS_STRNCAT
@@ -341,6 +387,9 @@ L(Unalign16Both):
mov $16, %ecx
movdqa (%esi, %ecx), %xmm1
movaps 16(%esi, %ecx), %xmm2
+# ifdef __CHKP__
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movdqu %xmm1, (%eax, %ecx)
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %edx
@@ -352,6 +401,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
L(Unalign16BothBigN):
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm3
movdqu %xmm2, (%eax, %ecx)
pcmpeqb %xmm3, %xmm0
@@ -364,6 +417,10 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm4
movdqu %xmm3, (%eax, %ecx)
pcmpeqb %xmm4, %xmm0
@@ -376,6 +433,10 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm1
movdqu %xmm4, (%eax, %ecx)
pcmpeqb %xmm1, %xmm0
@@ -388,6 +449,10 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm2
movdqu %xmm1, (%eax, %ecx)
pcmpeqb %xmm2, %xmm0
@@ -400,6 +465,10 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm3
movdqu %xmm2, (%eax, %ecx)
pcmpeqb %xmm3, %xmm0
@@ -412,6 +481,9 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movdqu %xmm3, (%eax, %ecx)
mov %esi, %edx
lea 16(%esi, %ecx), %esi
@@ -421,6 +493,9 @@ L(Unalign16BothBigN):
# ifdef USE_AS_STRNCAT
lea 128(%ebx, %edx), %ebx
# endif
+# ifdef __CHKP__
+ bndcu (%esi), %bnd1
+# endif
movaps (%esi), %xmm2
movaps %xmm2, %xmm4
movaps 16(%esi), %xmm5
@@ -443,6 +518,10 @@ L(Unalign16BothBigN):
L(Unaligned64Loop_start):
add $64, %eax
add $64, %esi
+# ifdef __CHKP__
+ bndcu (%esi), %bnd1
+ bndcu -1(%eax), %bnd0
+# endif
movdqu %xmm4, -64(%eax)
movaps (%esi), %xmm2
movdqa %xmm2, %xmm4
@@ -485,11 +564,18 @@ L(Unaligned64Leave):
jnz L(CopyFrom1To16BytesUnaligned_32)
bsf %ecx, %edx
+# ifdef __CHKP__
+ bndcu 47(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
movdqu %xmm5, 16(%eax)
movdqu %xmm6, 32(%eax)
add $48, %esi
add $48, %eax
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
# ifdef USE_AS_STRNCAT
@@ -501,12 +587,18 @@ L(BigN):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail)
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
pcmpeqb 16(%esi), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(CopyFrom1To32Bytes)
movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm1, (%eax)
sub %ecx, %eax
sub $48, %ebx
@@ -515,6 +607,9 @@ L(BigN):
mov $16, %ecx
movdqa (%esi, %ecx), %xmm1
movaps 16(%esi, %ecx), %xmm2
+# ifdef __CHKP__
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movdqu %xmm1, (%eax, %ecx)
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %edx
@@ -532,12 +627,20 @@ L(CopyFrom1To16Bytes):
add %ecx, %eax
add %ecx, %esi
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesTail):
add %ecx, %esi
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
@@ -546,6 +649,10 @@ L(CopyFrom1To32Bytes1):
add $16, %eax
L(CopyFrom1To16BytesTail1):
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
@@ -554,34 +661,60 @@ L(CopyFrom1To32Bytes):
add %ecx, %esi
add $16, %edx
sub %ecx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesUnaligned_0):
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesUnaligned_16):
bsf %ecx, %edx
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
add $16, %esi
add $16, %eax
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesUnaligned_32):
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu 31(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
movdqu %xmm5, 16(%eax)
add $32, %esi
add $32, %eax
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
# ifdef USE_AS_STRNCAT
.p2align 4
L(CopyFrom1To16BytesExit):
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
/* Case2 */
@@ -594,6 +727,10 @@ L(CopyFrom1To16BytesCase2):
bsf %edx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
@@ -605,6 +742,10 @@ L(CopyFrom1To32BytesCase2):
sub %ecx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
L(CopyFrom1To16BytesTailCase2):
@@ -613,12 +754,20 @@ L(CopyFrom1To16BytesTailCase2):
bsf %edx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
L(CopyFrom1To16BytesTail1Case2):
bsf %edx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
/* Case2 or Case3, Case3 */
@@ -631,6 +780,10 @@ L(CopyFrom1To16BytesCase3):
add $16, %ebx
add %ecx, %eax
add %ecx, %esi
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
@@ -639,6 +792,10 @@ L(CopyFrom1To32BytesCase2OrCase3):
jnz L(CopyFrom1To32BytesCase2)
sub %ecx, %ebx
add %ecx, %esi
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
@@ -647,6 +804,10 @@ L(CopyFrom1To16BytesTailCase2OrCase3):
jnz L(CopyFrom1To16BytesTailCase2)
sub %ecx, %ebx
add %ecx, %esi
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
@@ -657,6 +818,10 @@ L(CopyFrom1To32Bytes1Case2OrCase3):
L(CopyFrom1To16BytesTail1Case2OrCase3):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail1Case2)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
# endif
@@ -1110,15 +1275,27 @@ L(Unaligned64LeaveCase3):
and $-16, %ecx
add $48, %ebx
jl L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
sub $16, %ebx
jb L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 31(%eax), %bnd0
+# endif
movdqu %xmm5, 16(%eax)
sub $16, %ebx
jb L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 47(%eax), %bnd0
+# endif
movdqu %xmm6, 32(%eax)
sub $16, %ebx
jb L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 63(%eax), %bnd0
+# endif
movdqu %xmm7, 48(%eax)
xor %bh, %bh
movb %bh, 64(%eax)
@@ -1137,6 +1314,9 @@ L(Unaligned64LeaveCase2):
pcmpeqb %xmm5, %xmm0
pmovmskb %xmm0, %edx
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
add $16, %ecx
sub $16, %ebx
@@ -1146,6 +1326,9 @@ L(Unaligned64LeaveCase2):
pcmpeqb %xmm6, %xmm0
pmovmskb %xmm0, %edx
+# ifdef __CHKP__
+ bndcu 31(%eax), %bnd0
+# endif
movdqu %xmm5, 16(%eax)
add $16, %ecx
sub $16, %ebx
@@ -1155,6 +1338,9 @@ L(Unaligned64LeaveCase2):
pcmpeqb %xmm7, %xmm0
pmovmskb %xmm0, %edx
+# ifdef __CHKP__
+ bndcu 47(%eax), %bnd0
+# endif
movdqu %xmm6, 32(%eax)
lea 16(%eax, %ecx), %eax
lea 16(%esi, %ecx), %esi