summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortege <tege@gmplib.org>2000-04-17 15:01:59 +0200
committertege <tege@gmplib.org>2000-04-17 15:01:59 +0200
commitbb6dd77f4a97c4b0b10b47fa92031079b32fc37f (patch)
tree43cccba51acc1d951aa20fe0d274898177f78ca2
parent0e64aaf90b084b20f76a286ef2d4e11215356b2b (diff)
downloadgmp-bb6dd77f4a97c4b0b10b47fa92031079b32fc37f.tar.gz
Optimizations from Robert Harley.
-rw-r--r--mpn/arm/add_n.S29
-rw-r--r--mpn/arm/addmul_1.S99
-rw-r--r--mpn/arm/mul_1.S85
-rw-r--r--mpn/arm/sub_n.S27
4 files changed, 116 insertions, 124 deletions
diff --git a/mpn/arm/add_n.S b/mpn/arm/add_n.S
index a6bcbcbb1..10a88b657 100644
--- a/mpn/arm/add_n.S
+++ b/mpn/arm/add_n.S
@@ -20,13 +20,13 @@
@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
@ MA 02111-1307, USA.
-
+
#define s r0
#define a r1
#define b r2
#define n r3
-#define sl r10 /* Do not use, reserved for PIC. */
+#define sl r10
#define fp r11
#define ip r12
#define sp r13
@@ -35,42 +35,43 @@
.text
.align 0
- .global __gmpn_add_n
+ .global __gmpn_add_n
.type __gmpn_add_n,%function
__gmpn_add_n:
stmfd sp!, { r8, r9, lr }
movs n, n, lsr #1
- bcc Lskip1
+ bcc skip1
ldr ip, [a], #4
ldr lr, [b], #4
adds ip, ip, lr
str ip, [s], #4
-Lskip1:
+skip1:
tst n, #1
- beq Lskip2
+ beq skip2
ldmia a!, { r8, r9 }
ldmia b!, { ip, lr }
adcs r8, r8, ip
adcs r9, r9, lr
stmia s!, { r8, r9 }
-Lskip2:
+skip2:
bics n, n, #1
- beq Lreturn
+ beq return
stmfd sp!, { r4, r5, r6, r7 }
-LLoop:
+add_n_loop:
ldmia a!, { r4, r5, r6, r7 }
ldmia b!, { r8, r9, ip, lr }
adcs r4, r4, r8
+ ldr r8, [s] /* Bring stuff into cache. */
adcs r5, r5, r9
adcs r6, r6, ip
adcs r7, r7, lr
stmia s!, { r4, r5, r6, r7 }
sub n, n, #2
teq n, #0
- bne LLoop
+ bne add_n_loop
ldmfd sp!, { r4, r5, r6, r7 }
-Lreturn:
+return:
adc r0, n, #0
- ldmfd sp!, { r8, r9, pc }^
-Lend:
- .size __gmpn_add_n, Lend - __gmpn_add_n
+ ldmfd sp!, { r8, r9, pc }
+end:
+ .size __gmpn_add_n, end - __gmpn_add_n
diff --git a/mpn/arm/addmul_1.S b/mpn/arm/addmul_1.S
index e9b878eaa..f4316df33 100644
--- a/mpn/arm/addmul_1.S
+++ b/mpn/arm/addmul_1.S
@@ -20,14 +20,14 @@
@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
@ MA 02111-1307, USA.
-
+
#define p r0
#define a r1
#define n r2
#define w r3
-#define sl r10 /* Do not use, reserved for PIC. */
-#define fp r11
+#define z r11
+
#define ip r12
#define sp r13
#define lr r14
@@ -38,61 +38,52 @@
.global __gmpn_addmul_1
.type __gmpn_addmul_1,%function
__gmpn_addmul_1:
- stmfd sp!, { r7, r8, r9, lr }
- mov lr, #0
+ stmfd sp!, { r8-r11, lr }
+ mov z, #0
+ mov ip, #0
movs n, n, lsr #1
- bcc Lskip1
- ldmia a!, { lr }
- umull ip, lr, w, lr
- ldmia p, { r9 }
- adds r9, r9, ip
- adc lr, lr, #0
- stmia p!, { r9 }
-Lskip1:
+ bcc skip1
+ ldr lr, [a], #4
+ ldr r9, [p]
+ umlal r9, ip, w, lr
+ str r9, [p], #4
+skip1:
movs n, n, lsr #1
- bcc Lskip2
- mov r7, lr
+ bcc skip2
+ ldmia p, { r9, r10 }
+ adds r8, ip, r9
+ adc r9, z, #0
ldmia a!, { ip, lr }
- umull r8, r9, w, ip
- umull ip, lr, w, lr
- adds r7, r7, r8
- adcs r8, r9, ip
- adc lr, lr, #0
- ldmia p, { r9, ip }
- adds r7, r7, r9
- adcs r8, r8, ip
- adc lr, lr, #0
- stmia p!, { r7, r8 }
-Lskip2:
+ umlal r8, r9, w, ip
+ adds r9, r9, r10
+ adc ip, z, #0
+ umlal r9, ip, w, lr
+ stmia p!, { r8, r9 }
+skip2:
teq n, #0
- beq Lreturn
- stmfd sp!, { r5, r6 }
-LLoop:
- mov r5, lr
- ldmia a!, { r8, r9, ip, lr }
- umull r6, r7, w, r8
- umull r8, r9, w, r9
+ beq return
+ stmfd sp!, { r4-r7 }
+addmul_loop:
+ ldmia p, { r5, r6, r7, r8 }
+ adds r4, ip, r5
+ adc r5, z, #0
+ ldmia a!, { r9, r10, ip, lr }
+ umlal r4, r5, w, r9
adds r5, r5, r6
- adcs r6, r7, r8
- adc r7, r9, #0
- umull r8, r9, w, ip
- umull ip, lr, w, lr
+ adc r6, z, #0
+ umlal r5, r6, w, r10
+ adds r6, r6, r7
+ adc r7, z, #0
+ umlal r6, r7, w, ip
adds r7, r7, r8
- adcs r8, r9, ip
- adc lr, lr, #0
- ldmia p!, { r9, ip }
- adds r5, r5, r9
- adcs r6, r6, ip
- ldmia p!, { r9, ip }
- adcs r7, r7, r9
- adcs r8, r8, ip
- adc lr, lr, #0
- stmdb p, { r5, r6, r7, r8 }
+ adc ip, z, #0
+ umlal r7, ip, w, lr
subs n, n, #1
- bne LLoop
- ldmfd sp!, { r5, r6 }
-Lreturn:
- mov r0, lr
- ldmfd sp!, { r7, r8, r9, pc }^
-Lend:
- .size __gmpn_addmul_1, Lend - __gmpn_addmul_1
+ stmia p!, { r4, r5, r6, r7 }
+ bne addmul_loop
+ ldmfd sp!, { r4-r7 }
+return:
+ mov r0, ip
+ ldmfd sp!, { r8-r11, pc }
+end:
+ .size __gmpn_addmul_1, end - __gmpn_addmul_1
diff --git a/mpn/arm/mul_1.S b/mpn/arm/mul_1.S
index 4ef4d8393..06f145267 100644
--- a/mpn/arm/mul_1.S
+++ b/mpn/arm/mul_1.S
@@ -26,7 +26,7 @@
#define n r2
#define w r3
-#define sl r10 /* Do not use, reserved for PIC. */
+#define sl r10
#define fp r11
#define ip r12
#define sp r13
@@ -38,45 +38,44 @@
.global __gmpn_mul_1
.type __gmpn_mul_1,%function
__gmpn_mul_1:
- stmfd sp!, { r7, r8, r9, lr }
- mov lr, #0
- movs n, n, lsr #1
- bcc Lskip1
- ldmia a!, { lr }
- umull ip, lr, w, lr
- stmia p!, { ip }
-Lskip1:
- movs n, n, lsr #1
- bcc Lskip2
- ldmia a!, { r9, ip }
- umull r7, r8, w, r9
- umull r9, ip, w, ip
- adds r7, r7, lr
- adcs r8, r8, r9
- adc lr, ip, #0
- stmia p!, { r7, r8 }
-Lskip2:
- teq n, #0
- beq Lreturn
- stmfd sp!, { r5, r6 }
-LLoop:
- ldmia a!, { r7, r8, r9, ip }
- umull r5, r6, w, r7
- umull r7, r8, w, r8
- adds r5, r5, lr
- adcs r6, r6, r7
- adc r7, r8, #0
- umull lr, r8, w, r9
- umull r9, ip, w, ip
- adds r7, r7, lr
- adcs r8, r8, r9
- adc lr, ip, #0
- stmia p!, { r5, r6, r7, r8 }
- subs n, n, #1
- bne LLoop
- ldmfd sp!, { r5, r6 }
-Lreturn:
- mov r0, lr
- ldmfd sp!, { r7, r8, r9, pc }^
-Lend:
- .size __gmpn_mul_1, Lend - __gmpn_mul_1
+ stmfd sp!, { r8, r9, lr }
+ ands ip, n, #1
+ beq skip1
+ ldr lr, [a], #4
+ umull r9, ip, w, lr
+ str r9, [p], #4
+skip1:
+ tst n, #2
+ beq skip2
+ mov r8, ip
+ ldmia a!, { ip, lr }
+ mov r9, #0
+ umlal r8, r9, w, ip
+ mov ip, #0
+ umlal r9, ip, w, lr
+ stmia p!, { r8, r9 }
+skip2:
+ bics n, n, #3
+ beq return
+ stmfd sp!, { r6, r7 }
+mul_1_loop:
+ mov r6, ip
+ ldmia a!, { r8, r9, ip, lr }
+ ldr r7, [p] /* Bring stuff into cache. */
+ mov r7, #0
+ umlal r6, r7, w, r8
+ mov r8, #0
+ umlal r7, r8, w, r9
+ mov r9, #0
+ umlal r8, r9, w, ip
+ mov ip, #0
+ umlal r9, ip, w, lr
+ subs n, n, #4
+ stmia p!, { r6, r7, r8, r9 }
+ bne mul_1_loop
+ ldmfd sp!, { r6, r7 }
+return:
+ mov r0, ip
+ ldmfd sp!, { r8, r9, pc }
+end:
+ .size __gmpn_mul_1, end - __gmpn_mul_1
diff --git a/mpn/arm/sub_n.S b/mpn/arm/sub_n.S
index 32a0dbc31..125a3cdf1 100644
--- a/mpn/arm/sub_n.S
+++ b/mpn/arm/sub_n.S
@@ -20,13 +20,13 @@
@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
@ MA 02111-1307, USA.
-
+
#define d r0
#define a r1
#define b r2
#define n r3
-#define sl r10 /* Do not use, reserved for PIC. */
+#define sl r10
#define fp r11
#define ip r12
#define sp r13
@@ -41,38 +41,39 @@ __gmpn_sub_n:
stmfd sp!, { r8, r9, lr }
subs ip, ip, ip
tst n, #1
- beq Lskip1
+ beq skip1
ldr ip, [a], #4
ldr lr, [b], #4
subs ip, ip, lr
str ip, [d], #4
-Lskip1:
+skip1:
tst n, #2
- beq Lskip2
+ beq skip2
ldmia a!, { r8, r9 }
ldmia b!, { ip, lr }
sbcs r8, r8, ip
sbcs r9, r9, lr
stmia d!, { r8, r9 }
-Lskip2:
+skip2:
bics n, n, #3
- beq Lreturn
+ beq return
stmfd sp!, { r4, r5, r6, r7 }
-LLoop:
+sub_n_loop:
ldmia a!, { r4, r5, r6, r7 }
ldmia b!, { r8, r9, ip, lr }
sbcs r4, r4, r8
+ ldr r8, [d] /* Bring stuff into cache. */
sbcs r5, r5, r9
sbcs r6, r6, ip
sbcs r7, r7, lr
stmia d!, { r4, r5, r6, r7 }
sub n, n, #4
teq n, #0
- bne LLoop
+ bne sub_n_loop
ldmfd sp!, { r4, r5, r6, r7 }
-Lreturn:
+return:
sbc r0, r0, r0
and r0, r0, #1
- ldmfd sp!, { r8, r9, pc }^
-Lend:
- .size __gmpn_sub_n, Lend - __gmpn_sub_n
+ ldmfd sp!, { r8, r9, pc }
+end:
+ .size __gmpn_sub_n, end - __gmpn_sub_n