diff options
author | Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> | 2017-07-03 10:46:13 +0530 |
---|---|---|
committer | Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> | 2017-07-03 10:46:13 +0530 |
commit | 2572f356b18ddee03b331ba33f5a2ae65d031a59 (patch) | |
tree | b3b0ee3b360fe7f115319e3b3a38bc30b9c98c38 /sysdeps/powerpc/powerpc64 | |
parent | 8dc6133eff1c09382bc8f6f98c49fb807900eecd (diff) | |
download | glibc-2572f356b18ddee03b331ba33f5a2ae65d031a59.tar.gz |
powerpc: Clean up strlen and strnlen for power8
To align a quadword aligned address to 64 bytes, maximum of three
16 bytes load is needed for worst case instead of loading four times.
Diffstat (limited to 'sysdeps/powerpc/powerpc64')
-rw-r--r-- | sysdeps/powerpc/powerpc64/power8/strlen.S | 11 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power8/strnlen.S | 10 |
2 files changed, 1 insertions, 20 deletions
diff --git a/sysdeps/powerpc/powerpc64/power8/strlen.S b/sysdeps/powerpc/powerpc64/power8/strlen.S index 8fdb6f5cc1..5691d1d93a 100644 --- a/sysdeps/powerpc/powerpc64/power8/strlen.S +++ b/sysdeps/powerpc/powerpc64/power8/strlen.S @@ -144,17 +144,6 @@ L(align64): or r5,r10,r11 cmpdi cr7,r5,0 addi r9,r9,16 - bne cr7,L(dword_zero) - - andi. r10,r9,63 - beq cr0,L(preloop) - ld r6,8(r4) - ldu r5,16(r4) - cmpb r10,r6,r0 - cmpb r11,r5,r0 - or r5,r10,r11 - cmpdi cr7,r5,0 - addi r9,r9,16 /* At this point, we are necessarily 64-byte aligned. If no zeroes were found, jump to the vectorized loop. */ diff --git a/sysdeps/powerpc/powerpc64/power8/strnlen.S b/sysdeps/powerpc/powerpc64/power8/strnlen.S index 07608ffa26..6d669d4a54 100644 --- a/sysdeps/powerpc/powerpc64/power8/strnlen.S +++ b/sysdeps/powerpc/powerpc64/power8/strnlen.S @@ -141,15 +141,7 @@ ENTRY_TOCLESS (__strnlen) addi r4,r4,-16 /* Decrement maxlen in 16 bytes. */ bne cr6,L(found_aligning64B) /* If found null bytes. */ - /* Unroll 3x above code block until aligned or find null bytes. */ - andi. r7,r5,63 - beq cr0,L(preloop_64B) - lvx v1,r5,r6 - vcmpequb. v1,v1,v0 - addi r5,r5,16 - addi r4,r4,-16 - bne cr6,L(found_aligning64B) - + /* Unroll 2x above code block until aligned or find null bytes. */ andi. r7,r5,63 beq cr0,L(preloop_64B) lvx v1,r5,r6 |