diff options
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power8/strcmp.S | 30 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power9/strcmp.S | 30 |
3 files changed, 22 insertions, 44 deletions
@@ -1,3 +1,9 @@ +2017-02-07 Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> + + * sysdeps/powerpc/powerpc64/power8/strcmp.S: Adjust address for + unaligned load for shorter strings. + * sysdeps/powerpc/powerpc64/power9/strcmp.S: Likewise. + 2017-02-06 Joseph Myers <joseph@codesourcery.com> * math/libm-test-driver.c (flag_test_errno): New variable. diff --git a/sysdeps/powerpc/powerpc64/power8/strcmp.S b/sysdeps/powerpc/powerpc64/power8/strcmp.S index c34ff4a23b..d46bff80cd 100644 --- a/sysdeps/powerpc/powerpc64/power8/strcmp.S +++ b/sysdeps/powerpc/powerpc64/power8/strcmp.S @@ -30,21 +30,21 @@ EALIGN (strcmp, 4, 0) li r0,0 - /* Check if [s1]+32 or [s2]+32 will cross a 4K page boundary using + /* Check if [s1]+16 or [s2]+16 will cross a 4K page boundary using the code: (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE)) - with PAGE_SIZE being 4096 and ITER_SIZE begin 32. */ + with PAGE_SIZE being 4096 and ITER_SIZE begin 16. */ rldicl r7,r3,0,52 rldicl r9,r4,0,52 - cmpldi cr7,r7,4096-32 + cmpldi cr7,r7,4096-16 bgt cr7,L(pagecross_check) - cmpldi cr5,r9,4096-32 + cmpldi cr5,r9,4096-16 bgt cr5,L(pagecross_check) - /* For short string up to 32 bytes, load both s1 and s2 using + /* For short string up to 16 bytes, load both s1 and s2 using unaligned dwords and compare. */ ld r8,0(r3) ld r10,0(r4) @@ -60,25 +60,11 @@ EALIGN (strcmp, 4, 0) orc. r9,r12,r11 bne cr0,L(different_nocmpb) - ld r8,16(r3) - ld r10,16(r4) - cmpb r12,r8,r0 - cmpb r11,r8,r10 - orc. r9,r12,r11 - bne cr0,L(different_nocmpb) - - ld r8,24(r3) - ld r10,24(r4) - cmpb r12,r8,r0 - cmpb r11,r8,r10 - orc. r9,r12,r11 - bne cr0,L(different_nocmpb) - - addi r7,r3,32 - addi r4,r4,32 + addi r7,r3,16 + addi r4,r4,16 L(align_8b): - /* Now it has checked for first 32 bytes, align source1 to doubleword + /* Now it has checked for first 16 bytes, align source1 to doubleword and adjust source2 address. */ rldicl r9,r7,0,61 /* source1 alignment to doubleword */ subf r4,r9,r4 /* Adjust source2 address based on source1 diff --git a/sysdeps/powerpc/powerpc64/power9/strcmp.S b/sysdeps/powerpc/powerpc64/power9/strcmp.S index 3e32396c94..17ec8c24c3 100644 --- a/sysdeps/powerpc/powerpc64/power9/strcmp.S +++ b/sysdeps/powerpc/powerpc64/power9/strcmp.S @@ -65,21 +65,21 @@ EALIGN (strcmp, 4, 0) li r0, 0 - /* Check if [s1]+32 or [s2]+32 will cross a 4K page boundary using + /* Check if [s1]+16 or [s2]+16 will cross a 4K page boundary using the code: (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE)) - with PAGE_SIZE being 4096 and ITER_SIZE begin 32. */ + with PAGE_SIZE being 4096 and ITER_SIZE begin 16. */ rldicl r7, r3, 0, 52 rldicl r9, r4, 0, 52 - cmpldi cr7, r7, 4096-32 + cmpldi cr7, r7, 4096-16 bgt cr7, L(pagecross_check) - cmpldi cr5, r9, 4096-32 + cmpldi cr5, r9, 4096-16 bgt cr5, L(pagecross_check) - /* For short strings up to 32 bytes, load both s1 and s2 using + /* For short strings up to 16 bytes, load both s1 and s2 using unaligned dwords and compare. */ ld r8, 0(r3) ld r10, 0(r4) @@ -95,25 +95,11 @@ EALIGN (strcmp, 4, 0) orc. r9, r12, r11 bne cr0, L(different_nocmpb) - ld r8, 16(r3) - ld r10, 16(r4) - cmpb r12, r8, r0 - cmpb r11, r8, r10 - orc. r9, r12, r11 - bne cr0, L(different_nocmpb) - - ld r8, 24(r3) - ld r10, 24(r4) - cmpb r12, r8, r0 - cmpb r11, r8, r10 - orc. r9, r12, r11 - bne cr0, L(different_nocmpb) - - addi r7, r3, 32 - addi r4, r4, 32 + addi r7, r3, 16 + addi r4, r4, 16 L(align): - /* Now it has checked for first 32 bytes. */ + /* Now it has checked for first 16 bytes. */ vspltisb v0, 0 vspltisb v2, -1 lvsr v6, 0, r4 /* Compute mask. */ |