From bb6dd77f4a97c4b0b10b47fa92031079b32fc37f Mon Sep 17 00:00:00 2001 From: tege Date: Mon, 17 Apr 2000 15:01:59 +0200 Subject: Optimizations from Robert Harley. --- mpn/arm/add_n.S | 29 ++++++++-------- mpn/arm/addmul_1.S | 99 +++++++++++++++++++++++++----------------------------- mpn/arm/mul_1.S | 85 +++++++++++++++++++++++----------------------- mpn/arm/sub_n.S | 27 ++++++++------- 4 files changed, 116 insertions(+), 124 deletions(-) diff --git a/mpn/arm/add_n.S b/mpn/arm/add_n.S index a6bcbcbb1..10a88b657 100644 --- a/mpn/arm/add_n.S +++ b/mpn/arm/add_n.S @@ -20,13 +20,13 @@ @ along with the GNU MP Library; see the file COPYING.LIB. If not, write to @ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, @ MA 02111-1307, USA. - + #define s r0 #define a r1 #define b r2 #define n r3 -#define sl r10 /* Do not use, reserved for PIC. */ +#define sl r10 #define fp r11 #define ip r12 #define sp r13 @@ -35,42 +35,43 @@ .text .align 0 - .global __gmpn_add_n + .global __gmpn_add_n .type __gmpn_add_n,%function __gmpn_add_n: stmfd sp!, { r8, r9, lr } movs n, n, lsr #1 - bcc Lskip1 + bcc skip1 ldr ip, [a], #4 ldr lr, [b], #4 adds ip, ip, lr str ip, [s], #4 -Lskip1: +skip1: tst n, #1 - beq Lskip2 + beq skip2 ldmia a!, { r8, r9 } ldmia b!, { ip, lr } adcs r8, r8, ip adcs r9, r9, lr stmia s!, { r8, r9 } -Lskip2: +skip2: bics n, n, #1 - beq Lreturn + beq return stmfd sp!, { r4, r5, r6, r7 } -LLoop: +add_n_loop: ldmia a!, { r4, r5, r6, r7 } ldmia b!, { r8, r9, ip, lr } adcs r4, r4, r8 + ldr r8, [s] /* Bring stuff into cache. */ adcs r5, r5, r9 adcs r6, r6, ip adcs r7, r7, lr stmia s!, { r4, r5, r6, r7 } sub n, n, #2 teq n, #0 - bne LLoop + bne add_n_loop ldmfd sp!, { r4, r5, r6, r7 } -Lreturn: +return: adc r0, n, #0 - ldmfd sp!, { r8, r9, pc }^ -Lend: - .size __gmpn_add_n, Lend - __gmpn_add_n + ldmfd sp!, { r8, r9, pc } +end: + .size __gmpn_add_n, end - __gmpn_add_n diff --git a/mpn/arm/addmul_1.S b/mpn/arm/addmul_1.S index e9b878eaa..f4316df33 100644 --- a/mpn/arm/addmul_1.S +++ b/mpn/arm/addmul_1.S @@ -20,14 +20,14 @@ @ along with the GNU MP Library; see the file COPYING.LIB. If not, write to @ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, @ MA 02111-1307, USA. - + #define p r0 #define a r1 #define n r2 #define w r3 -#define sl r10 /* Do not use, reserved for PIC. */ -#define fp r11 +#define z r11 + #define ip r12 #define sp r13 #define lr r14 @@ -38,61 +38,52 @@ .global __gmpn_addmul_1 .type __gmpn_addmul_1,%function __gmpn_addmul_1: - stmfd sp!, { r7, r8, r9, lr } - mov lr, #0 + stmfd sp!, { r8-r11, lr } + mov z, #0 + mov ip, #0 movs n, n, lsr #1 - bcc Lskip1 - ldmia a!, { lr } - umull ip, lr, w, lr - ldmia p, { r9 } - adds r9, r9, ip - adc lr, lr, #0 - stmia p!, { r9 } -Lskip1: + bcc skip1 + ldr lr, [a], #4 + ldr r9, [p] + umlal r9, ip, w, lr + str r9, [p], #4 +skip1: movs n, n, lsr #1 - bcc Lskip2 - mov r7, lr + bcc skip2 + ldmia p, { r9, r10 } + adds r8, ip, r9 + adc r9, z, #0 ldmia a!, { ip, lr } - umull r8, r9, w, ip - umull ip, lr, w, lr - adds r7, r7, r8 - adcs r8, r9, ip - adc lr, lr, #0 - ldmia p, { r9, ip } - adds r7, r7, r9 - adcs r8, r8, ip - adc lr, lr, #0 - stmia p!, { r7, r8 } -Lskip2: + umlal r8, r9, w, ip + adds r9, r9, r10 + adc ip, z, #0 + umlal r9, ip, w, lr + stmia p!, { r8, r9 } +skip2: teq n, #0 - beq Lreturn - stmfd sp!, { r5, r6 } -LLoop: - mov r5, lr - ldmia a!, { r8, r9, ip, lr } - umull r6, r7, w, r8 - umull r8, r9, w, r9 + beq return + stmfd sp!, { r4-r7 } +addmul_loop: + ldmia p, { r5, r6, r7, r8 } + adds r4, ip, r5 + adc r5, z, #0 + ldmia a!, { r9, r10, ip, lr } + umlal r4, r5, w, r9 adds r5, r5, r6 - adcs r6, r7, r8 - adc r7, r9, #0 - umull r8, r9, w, ip - umull ip, lr, w, lr + adc r6, z, #0 + umlal r5, r6, w, r10 + adds r6, r6, r7 + adc r7, z, #0 + umlal r6, r7, w, ip adds r7, r7, r8 - adcs r8, r9, ip - adc lr, lr, #0 - ldmia p!, { r9, ip } - adds r5, r5, r9 - adcs r6, r6, ip - ldmia p!, { r9, ip } - adcs r7, r7, r9 - adcs r8, r8, ip - adc lr, lr, #0 - stmdb p, { r5, r6, r7, r8 } + adc ip, z, #0 + umlal r7, ip, w, lr subs n, n, #1 - bne LLoop - ldmfd sp!, { r5, r6 } -Lreturn: - mov r0, lr - ldmfd sp!, { r7, r8, r9, pc }^ -Lend: - .size __gmpn_addmul_1, Lend - __gmpn_addmul_1 + stmia p!, { r4, r5, r6, r7 } + bne addmul_loop + ldmfd sp!, { r4-r7 } +return: + mov r0, ip + ldmfd sp!, { r8-r11, pc } +end: + .size __gmpn_addmul_1, end - __gmpn_addmul_1 diff --git a/mpn/arm/mul_1.S b/mpn/arm/mul_1.S index 4ef4d8393..06f145267 100644 --- a/mpn/arm/mul_1.S +++ b/mpn/arm/mul_1.S @@ -26,7 +26,7 @@ #define n r2 #define w r3 -#define sl r10 /* Do not use, reserved for PIC. */ +#define sl r10 #define fp r11 #define ip r12 #define sp r13 @@ -38,45 +38,44 @@ .global __gmpn_mul_1 .type __gmpn_mul_1,%function __gmpn_mul_1: - stmfd sp!, { r7, r8, r9, lr } - mov lr, #0 - movs n, n, lsr #1 - bcc Lskip1 - ldmia a!, { lr } - umull ip, lr, w, lr - stmia p!, { ip } -Lskip1: - movs n, n, lsr #1 - bcc Lskip2 - ldmia a!, { r9, ip } - umull r7, r8, w, r9 - umull r9, ip, w, ip - adds r7, r7, lr - adcs r8, r8, r9 - adc lr, ip, #0 - stmia p!, { r7, r8 } -Lskip2: - teq n, #0 - beq Lreturn - stmfd sp!, { r5, r6 } -LLoop: - ldmia a!, { r7, r8, r9, ip } - umull r5, r6, w, r7 - umull r7, r8, w, r8 - adds r5, r5, lr - adcs r6, r6, r7 - adc r7, r8, #0 - umull lr, r8, w, r9 - umull r9, ip, w, ip - adds r7, r7, lr - adcs r8, r8, r9 - adc lr, ip, #0 - stmia p!, { r5, r6, r7, r8 } - subs n, n, #1 - bne LLoop - ldmfd sp!, { r5, r6 } -Lreturn: - mov r0, lr - ldmfd sp!, { r7, r8, r9, pc }^ -Lend: - .size __gmpn_mul_1, Lend - __gmpn_mul_1 + stmfd sp!, { r8, r9, lr } + ands ip, n, #1 + beq skip1 + ldr lr, [a], #4 + umull r9, ip, w, lr + str r9, [p], #4 +skip1: + tst n, #2 + beq skip2 + mov r8, ip + ldmia a!, { ip, lr } + mov r9, #0 + umlal r8, r9, w, ip + mov ip, #0 + umlal r9, ip, w, lr + stmia p!, { r8, r9 } +skip2: + bics n, n, #3 + beq return + stmfd sp!, { r6, r7 } +mul_1_loop: + mov r6, ip + ldmia a!, { r8, r9, ip, lr } + ldr r7, [p] /* Bring stuff into cache. */ + mov r7, #0 + umlal r6, r7, w, r8 + mov r8, #0 + umlal r7, r8, w, r9 + mov r9, #0 + umlal r8, r9, w, ip + mov ip, #0 + umlal r9, ip, w, lr + subs n, n, #4 + stmia p!, { r6, r7, r8, r9 } + bne mul_1_loop + ldmfd sp!, { r6, r7 } +return: + mov r0, ip + ldmfd sp!, { r8, r9, pc } +end: + .size __gmpn_mul_1, end - __gmpn_mul_1 diff --git a/mpn/arm/sub_n.S b/mpn/arm/sub_n.S index 32a0dbc31..125a3cdf1 100644 --- a/mpn/arm/sub_n.S +++ b/mpn/arm/sub_n.S @@ -20,13 +20,13 @@ @ along with the GNU MP Library; see the file COPYING.LIB. If not, write to @ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, @ MA 02111-1307, USA. - + #define d r0 #define a r1 #define b r2 #define n r3 -#define sl r10 /* Do not use, reserved for PIC. */ +#define sl r10 #define fp r11 #define ip r12 #define sp r13 @@ -41,38 +41,39 @@ __gmpn_sub_n: stmfd sp!, { r8, r9, lr } subs ip, ip, ip tst n, #1 - beq Lskip1 + beq skip1 ldr ip, [a], #4 ldr lr, [b], #4 subs ip, ip, lr str ip, [d], #4 -Lskip1: +skip1: tst n, #2 - beq Lskip2 + beq skip2 ldmia a!, { r8, r9 } ldmia b!, { ip, lr } sbcs r8, r8, ip sbcs r9, r9, lr stmia d!, { r8, r9 } -Lskip2: +skip2: bics n, n, #3 - beq Lreturn + beq return stmfd sp!, { r4, r5, r6, r7 } -LLoop: +sub_n_loop: ldmia a!, { r4, r5, r6, r7 } ldmia b!, { r8, r9, ip, lr } sbcs r4, r4, r8 + ldr r8, [d] /* Bring stuff into cache. */ sbcs r5, r5, r9 sbcs r6, r6, ip sbcs r7, r7, lr stmia d!, { r4, r5, r6, r7 } sub n, n, #4 teq n, #0 - bne LLoop + bne sub_n_loop ldmfd sp!, { r4, r5, r6, r7 } -Lreturn: +return: sbc r0, r0, r0 and r0, r0, #1 - ldmfd sp!, { r8, r9, pc }^ -Lend: - .size __gmpn_sub_n, Lend - __gmpn_sub_n + ldmfd sp!, { r8, r9, pc } +end: + .size __gmpn_sub_n, end - __gmpn_sub_n -- cgit v1.2.1