summaryrefslogtreecommitdiff
path: root/gcc/config/arm/lib1funcs.asm
diff options
context:
space:
mode:
authorerich <erich@138bc75d-0d04-0410-961f-82ee72b054a4>1996-01-19 10:11:00 +0000
committererich <erich@138bc75d-0d04-0410-961f-82ee72b054a4>1996-01-19 10:11:00 +0000
commite5e8e444a5724691e31d97e16b45ea12efdf059c (patch)
treee7273b9707b412666712c0271a514bf4999b7cac /gcc/config/arm/lib1funcs.asm
parent65f9689aed4ac0a791a5df3047a998c1e790e0d9 (diff)
downloadgcc-e5e8e444a5724691e31d97e16b45ea12efdf059c.tar.gz
arm/lib1funcs.asm (__divsi3, __modsi3, __udivsi3, __umodsi3): Replace
with smaller, faster versions. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@11070 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/arm/lib1funcs.asm')
-rw-r--r--gcc/config/arm/lib1funcs.asm1824
1 files changed, 304 insertions, 1520 deletions
diff --git a/gcc/config/arm/lib1funcs.asm b/gcc/config/arm/lib1funcs.asm
index 5490e25f2ff..c2db824738f 100644
--- a/gcc/config/arm/lib1funcs.asm
+++ b/gcc/config/arm/lib1funcs.asm
@@ -1,9 +1,7 @@
@ libgcc1 routines for ARM cpu.
-@ Division and remainder, from Appendix E of the Sparc Version 8
-@ Architecture Manual, with fixes from Gordon Irlam.
-@ Rewritten for the ARM by Richard Earnshaw (rwe@pegasus.esprit.ec.org)
+@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
-/* Copyright (C) 1995 Free Software Foundation, Inc.
+/* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
@@ -35,243 +33,13 @@ Boston, MA 02111-1307, USA. */
This exception does not however invalidate any other reasons why
the executable file might be covered by the GNU General Public License. */
-/*
- * Input: dividend and divisor in r0 and r1 respectively.
- *
- * m4 parameters:
- * NAME name of function to generate
- * OP OP=div => r0 / r1; OP=mod => r0 % r1
- * S S=true => signed; S=false => unsigned
- *
- * Algorithm parameters:
- * N how many bits per iteration we try to get (4)
- * WORDSIZE total number of bits (32)
- *
- * Derived constants:
- * TOPBITS number of bits in the top `decade' of a number
- *
- * Important variables:
- * Q the partial quotient under development (initially 0)
- * R the remainder so far, initially the dividend
- * ITER number of main division loop iterations required;
- * equal to ceil(log2(quotient) / N). Note that this
- * is the log base (2^N) of the quotient.
- * V the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- * Current estimate for non-large dividend is
- * ceil(log2(quotient) / N) * (10 + 7N/2) + C
- * A large dividend is one greater than 2^(31-TOPBITS) and takes a
- * different path, as the upper bits of the quotient must be developed
- * one bit at a time.
- */
-
-/*
-define(N, `4')dnl
-define(WORDSIZE, `32')dnl
-define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))dnl
-dnl
-define(dividend, `r0')dnl
-define(divisor, `r1')dnl
-define(Q, `r2')dnl
-define(R, `r3')dnl
-define(ITER, `ip')dnl
-define(V, `lr')dnl
-dnl
-dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d
-define(T, `r4')dnl
-define(SC, `r5')dnl
-ifelse(S, `true', `define(SIGN, `r6')')dnl
-define(REGLIST, `ifelse(S, `true', `{r4, r5, r6,', `{r4, r5,')')dnl
-define(ret, `ldmia sp!, REGLIST pc}')dnl
-dnl
-dnl This is the recursive definition for developing quotient digits.
-dnl
-dnl Parameters:
-dnl $1 the current depth, 1 <= $1 <= N
-dnl $2 the current accumulation of quotient bits
-dnl N max depth
-dnl
-dnl We add a new bit to $2 and either recurse or insert the bits in
-dnl the quotient. R, Q, and V are inputs and outputs as defined above;
-dnl the condition codes are expected to reflect the input R, and are
-dnl modified to reflect the output R.
-dnl
-define(DEVELOP_QUOTIENT_BITS,
-` @ depth $1, accumulated bits $2
- mov V, V, lsr #1
- blt L.$1.eval(2^N+$2+999)
- @ remainder is positive
- subs R, R, V
- ifelse($1, N,
- ` ifelse(eval(2*$2+1<0), `0',
- `add Q, Q, `#'eval($2*2+1)',
- `sub Q, Q, `#'eval(-($2*2+1))')
-
- b 9f
- ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
-L.$1.eval(2^N+$2+999):
- @ remainder is negative
- adds R, R, V
- ifelse($1, N,
- ` ifelse(eval(2*$2-1<0), `0',
- `add Q, Q, `#'eval($2*2-1)',
- `sub Q, Q, `#'eval(-($2*2-1))')
- b 9f
-
- ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
- ifelse($1, 1, `9:')')dnl
-
-#include "trap.h"
-
-ip .req r12
-sp .req r13
-lr .req r14
-pc .req r15
-.text
- .globl NAME
- .align 0
-NAME:
- stmdb sp!, REGLIST lr}
-ifelse(S, `true',
-` @ compute sign of result; if neither is negative, no problem
- ifelse(OP, `div', `eor SIGN, divisor, dividend @ compute sign',
- `mov SIGN, dividend')
- cmp divisor, #0
- rsbmi divisor, divisor, #0
- beq Ldiv_zero
- mov V, divisor
- movs R, dividend
- rsbmi R, R, #0 @ make dividend nonnegative
-',
-` @ Ready to divide. Compute size of quotient; scale comparand.
- movs V, divisor
- mov R, dividend
- beq Ldiv_zero
-')
-
- cmp R, V @ if divisor exceeds dividend, done
- mov Q, #0
- bcc Lgot_result @ (and algorithm fails otherwise)
- mov T, `#'(1 << (WORDSIZE - TOPBITS - 1))
- cmp R, T
- mov ITER, #0
- bcc Lnot_really_big
-
- @ `Here the dividend is >= 2^(31-N) or so. We must be careful here,
- @ as our usual N-at-a-shot divide step will cause overflow and havoc.
- @ The number of bits in the result here is N*ITER+SC, where SC <= N.
- @ Compute ITER in an unorthodox manner: know we need to shift V into
- @ the top decade: so do not even bother to compare to R.'
- mov SC, #1
- 1:
- cmp V, T
- bcs 3f
- mov V, V, lsl `#'N
- add ITER, ITER, #1
- b 1b
-
- @ Now compute SC.
- 2: adds V, V, V
- add SC, SC, #1
- bcc Lnot_too_big
-
- @ We get here if the divisor overflowed while shifting.
- @ This means that R has the high-order bit set.
- @ Restore V and subtract from R.
- mov T, T, lsl `#'TOPBITS
- mov V, V, lsr #1
- add V, T, V
- sub SC, SC, #1
- b Ldo_single_div
-
- Lnot_too_big:
- 3: cmp V, R
- bcc 2b
-@ beq Ldo_single_div
-
- /-* NB: these are commented out in the V8-Sparc manual as well *-/
- /-* (I do not understand this) *-/
- @ V > R: went too far: back up 1 step
- @ srl V, 1, V
- @ dec SC
- @ do single-bit divide steps
- @
- @ We have to be careful here. We know that R >= V, so we can do the
- @ first divide step without thinking. BUT, the others are conditional,
- @ and are only done if R >= 0. Because both R and V may have the high-
- @ order bit set in the first step, just falling into the regular
- @ division loop will mess up the first time around.
- @ So we unroll slightly...
- Ldo_single_div:
- subs SC, SC, #1
- blt Lend_regular_divide
- sub R, R, V
- mov Q, #1
- b Lend_single_divloop
- Lsingle_divloop:
- cmp R, #0
- mov Q, Q, lsl #1
- mov V, V, lsr #1
- @ R >= 0
- subpl R, R, V
- addpl Q, Q, #1
- @ R < 0
- addmi R, R, V
- submi Q, Q, #1
- Lend_single_divloop:
- subs SC, SC, #1
- bge Lsingle_divloop
- b Lend_regular_divide
-
-1:
- add ITER, ITER, #1
-Lnot_really_big:
- mov V, V, lsl `#'N
- cmp V, R
- bls 1b
- @
- @ HOW CAN ITER EVER BE -1 HERE ?????
- @
- cmn ITER, #1
- beq Lgot_result
-
-Ldivloop:
- cmp R, #0 @ set up for initial iteration
- mov Q, Q, lsl `#'N
- DEVELOP_QUOTIENT_BITS(1, 0)
-Lend_regular_divide:
- subs ITER, ITER, #1
- bge Ldivloop
- cmp R, #0
- @ non-restoring fixup here (one instruction only!)
-ifelse(OP, `div',
-` sublt Q, Q, #1
-', ` addlt R, divisor, R
-')
-
-Lgot_result:
-ifelse(S, `true',
-` @ check to see if answer should be < 0
- cmp SIGN, #0
- ifelse(OP, `div', `rsbmi Q, Q, #0', `rsbmi R, R, #0')
-')
- ifelse(OP, `div', `mov r0, Q', `mov r0, R')
- ret
-
-Ldiv_zero:
- @ Divide by zero trap. If it returns, return 0 (about as
- @ wrong as possible, but that is what SunOS does...).
- bl ___div0
- mov r0, #0
- ret
-*/
-
#ifdef __APCS_26__
#define RET movs
+#define RETc(x) mov##x##s
#define RETCOND ^
#else
#define RET mov
+#define RETc(x) mov##x
#define RETCOND
#endif
@@ -290,1323 +58,339 @@ Ldiv_zero:
#ifdef L_udivsi3
-ip .req r12
-sp .req r13
-lr .req r14
-pc .req r15
-.text
+dividend .req r0
+divisor .req r1
+result .req r2
+curbit .req r3
+ip .req r12
+sp .req r13
+lr .req r14
+pc .req r15
+ .text
.globl SYM (__udivsi3)
.align 0
-SYM (__udivsi3):
- stmdb sp!, {r4, r5, lr}
- @ Ready to divide. Compute size of quotient; scale comparand.
- movs lr, r1
- mov r3, r0
- beq Ldiv_zero
-
-
- cmp r3, lr @ if r1 exceeds r0, done
- mov r2, #0
- bcc Lgot_result @ (and algorithm fails otherwise)
- mov r4, #(1 << (32 - 4 - 1))
- cmp r3, r4
- mov ip, #0
- bcc Lnot_really_big
-
- @ Here the dividend is >= 2^(31-N) or so. We must be careful here,
- @ as our usual N-at-a-shot divide step will cause overflow and havoc.
- @ The number of bits in the result here is N*ITER+SC, where SC <= N.
- @ Compute ITER in an unorthodox manner: know we need to shift V into
- @ the top decade: so do not even bother to compare to R.
- mov r5, #1
- 1:
- cmp lr, r4
- bcs 3f
- mov lr, lr, lsl #4
- add ip, ip, #1
- b 1b
-
- @ Now compute r5.
- 2: adds lr, lr, lr
- add r5, r5, #1
- bcc Lnot_too_big
-
- @ We get here if the r1 overflowed while shifting.
- @ This means that r3 has the high-order bit set.
- @ Restore lr and subtract from r3.
- mov r4, r4, lsl #4
- mov lr, lr, lsr #1
- add lr, r4, lr
- sub r5, r5, #1
- b Ldo_single_div
-
- Lnot_too_big:
- 3: cmp lr, r3
- bcc 2b
-@ beq Ldo_single_div
-
- /* NB: these are commented out in the V8-Sparc manual as well */
- /* (I do not understand this) */
- @ lr > r3: went too far: back up 1 step
- @ srl lr, 1, lr
- @ dec r5
- @ do single-bit divide steps
- @
- @ We have to be careful here. We know that r3 >= lr, so we can do the
- @ first divide step without thinking. BUT, the others are conditional,
- @ and are only done if r3 >= 0. Because both r3 and lr may have the high-
- @ order bit set in the first step, just falling into the regular
- @ division loop will mess up the first time around.
- @ So we unroll slightly...
- Ldo_single_div:
- subs r5, r5, #1
- blt Lend_regular_divide
- sub r3, r3, lr
- mov r2, #1
- b Lend_single_divloop
- Lsingle_divloop:
- cmp r3, #0
- mov r2, r2, lsl #1
- mov lr, lr, lsr #1
- @ r3 >= 0
- subpl r3, r3, lr
- addpl r2, r2, #1
- @ r3 < 0
- addmi r3, r3, lr
- submi r2, r2, #1
- Lend_single_divloop:
- subs r5, r5, #1
- bge Lsingle_divloop
- b Lend_regular_divide
-
-1:
- add ip, ip, #1
-Lnot_really_big:
- mov lr, lr, lsl #4
- cmp lr, r3
- bls 1b
- @
- @ HOW CAN ip EVER BE -1 HERE ?????
- @
- cmn ip, #1
- beq Lgot_result
-
-Ldivloop:
- cmp r3, #0 @ set up for initial iteration
- mov r2, r2, lsl #4
- @ depth 1, accumulated bits 0
- mov lr, lr, lsr #1
- blt L.1.1015
- @ remainder is positive
- subs r3, r3, lr
- @ depth 2, accumulated bits 1
- mov lr, lr, lsr #1
- blt L.2.1016
- @ remainder is positive
- subs r3, r3, lr
- @ depth 3, accumulated bits 3
- mov lr, lr, lsr #1
- blt L.3.1018
- @ remainder is positive
- subs r3, r3, lr
- @ depth 4, accumulated bits 7
- mov lr, lr, lsr #1
- blt L.4.1022
- @ remainder is positive
- subs r3, r3, lr
- add r2, r2, #15
-
- b 9f
-
-L.4.1022:
- @ remainder is negative
- adds r3, r3, lr
- add r2, r2, #13
- b 9f
-
-
-
-L.3.1018:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 4, accumulated bits 5
- mov lr, lr, lsr #1
- blt L.4.1020
- @ remainder is positive
- subs r3, r3, lr
- add r2, r2, #11
-
- b 9f
-
-L.4.1020:
- @ remainder is negative
- adds r3, r3, lr
- add r2, r2, #9
- b 9f
-
-
-
-
-L.2.1016:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 3, accumulated bits 1
- mov lr, lr, lsr #1
- blt L.3.1016
- @ remainder is positive
- subs r3, r3, lr
- @ depth 4, accumulated bits 3
- mov lr, lr, lsr #1
- blt L.4.1018
- @ remainder is positive
- subs r3, r3, lr
- add r2, r2, #7
-
- b 9f
-
-L.4.1018:
- @ remainder is negative
- adds r3, r3, lr
- add r2, r2, #5
- b 9f
-
-
-
-L.3.1016:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 4, accumulated bits 1
- mov lr, lr, lsr #1
- blt L.4.1016
- @ remainder is positive
- subs r3, r3, lr
- add r2, r2, #3
-
- b 9f
-
-L.4.1016:
- @ remainder is negative
- adds r3, r3, lr
- add r2, r2, #1
- b 9f
-
-
-
-
-
-L.1.1015:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 2, accumulated bits -1
- mov lr, lr, lsr #1
- blt L.2.1014
- @ remainder is positive
- subs r3, r3, lr
- @ depth 3, accumulated bits -1
- mov lr, lr, lsr #1
- blt L.3.1014
- @ remainder is positive
- subs r3, r3, lr
- @ depth 4, accumulated bits -1
- mov lr, lr, lsr #1
- blt L.4.1014
- @ remainder is positive
- subs r3, r3, lr
- sub r2, r2, #1
-
- b 9f
-
-L.4.1014:
- @ remainder is negative
- adds r3, r3, lr
- sub r2, r2, #3
- b 9f
-
-
-
-L.3.1014:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 4, accumulated bits -3
- mov lr, lr, lsr #1
- blt L.4.1012
- @ remainder is positive
- subs r3, r3, lr
- sub r2, r2, #5
-
- b 9f
-
-L.4.1012:
- @ remainder is negative
- adds r3, r3, lr
- sub r2, r2, #7
- b 9f
-
-
-
-
-L.2.1014:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 3, accumulated bits -3
- mov lr, lr, lsr #1
- blt L.3.1012
- @ remainder is positive
- subs r3, r3, lr
- @ depth 4, accumulated bits -5
- mov lr, lr, lsr #1
- blt L.4.1010
- @ remainder is positive
- subs r3, r3, lr
- sub r2, r2, #9
-
- b 9f
-
-L.4.1010:
- @ remainder is negative
- adds r3, r3, lr
- sub r2, r2, #11
- b 9f
-
-
-
-L.3.1012:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 4, accumulated bits -7
- mov lr, lr, lsr #1
- blt L.4.1008
- @ remainder is positive
- subs r3, r3, lr
- sub r2, r2, #13
-
- b 9f
-
-L.4.1008:
- @ remainder is negative
- adds r3, r3, lr
- sub r2, r2, #15
- b 9f
-
-
-
-
-
- 9:
-Lend_regular_divide:
- subs ip, ip, #1
- bge Ldivloop
- cmp r3, #0
- @ non-restoring fixup here (one instruction only!)
- sublt r2, r2, #1
-
+SYM (__udivsi3):
+ cmp divisor, #0
+ beq Ldiv0
+ mov curbit, #1
+ mov result, #0
+ cmp dividend, divisor
+ bcc Lgot_result
+Loop1:
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+ cmp divisor, #0x10000000
+ cmpcc divisor, dividend
+ movcc divisor, divisor, lsl #4
+ movcc curbit, curbit, lsl #4
+ bcc Loop1
+
+Lbignum:
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+ cmp divisor, #0x80000000
+ cmpcc divisor, dividend
+ movcc divisor, divisor, lsl #1
+ movcc curbit, curbit, lsl #1
+ bcc Lbignum
+
+Loop3:
+ @ Test for possible subtractions, and note which bits
+ @ are done in the result. On the final pass, this may subtract
+ @ too much from the dividend, but the result will be ok, since the
+ @ "bit" will have been shifted out at the bottom.
+ cmp dividend, divisor
+ subcs dividend, dividend, divisor
+ orrcs result, result, curbit
+ cmp dividend, divisor, lsr #1
+ subcs dividend, dividend, divisor, lsr #1
+ orrcs result, result, curbit, lsr #1
+ cmp dividend, divisor, lsr #2
+ subcs dividend, dividend, divisor, lsr #2
+ orrcs result, result, curbit, lsr #2
+ cmp dividend, divisor, lsr #3
+ subcs dividend, dividend, divisor, lsr #3
+ orrcs result, result, curbit, lsr #3
+ cmp dividend, #0 @ Early termination?
+ movnes curbit, curbit, lsr #4 @ No, any more bits to do?
+ movne divisor, divisor, lsr #4
+ bne Loop3
Lgot_result:
+ mov r0, result
+ RET pc, lr
- mov r0, r2
- ldmia sp!, {r4, r5, pc}RETCOND
-
-Ldiv_zero:
- @ Divide by zero trap. If it returns, return 0 (about as
- @ wrong as possible, but that is what SunOS does...).
+Ldiv0:
+ str lr, [sp, #-4]!
bl SYM (__div0)
- mov r0, #0
- ldmia sp!, {r4, r5, pc}RETCOND
+ mov r0, #0 @ about as wrong as it could be
+ ldmia sp!, {pc}RETCOND
#endif /* L_udivsi3 */
-#ifdef L_divsi3
-
-ip .req r12
-sp .req r13
-lr .req r14
-pc .req r15
-.text
- .globl SYM (__divsi3)
- .align 0
-SYM (__divsi3):
- stmdb sp!, {r4, r5, r6, lr}
- @ compute sign of result; if neither is negative, no problem
- eor r6, r1, r0 @ compute sign
- cmp r1, #0
- rsbmi r1, r1, #0
- beq Ldiv_zero
- mov lr, r1
- movs r3, r0
- rsbmi r3, r3, #0 @ make dividend nonnegative
-
-
- cmp r3, lr @ if r1 exceeds r0, done
- mov r2, #0
- bcc Lgot_result @ (and algorithm fails otherwise)
- mov r4, #(1 << (32 - 4 - 1))
- cmp r3, r4
- mov ip, #0
- bcc Lnot_really_big
-
- @ Here the dividend is >= 2^(31-N) or so. We must be careful here,
- @ as our usual N-at-a-shot divide step will cause overflow and havoc.
- @ The number of bits in the result here is N*ITER+SC, where SC <= N.
- @ Compute ITER in an unorthodox manner: know we need to shift V into
- @ the top decade: so do not even bother to compare to R.
- mov r5, #1
- 1:
- cmp lr, r4
- bcs 3f
- mov lr, lr, lsl #4
- add ip, ip, #1
- b 1b
-
- @ Now compute r5.
- 2: adds lr, lr, lr
- add r5, r5, #1
- bcc Lnot_too_big
-
- @ We get here if the r1 overflowed while shifting.
- @ This means that r3 has the high-order bit set.
- @ Restore lr and subtract from r3.
- mov r4, r4, lsl #4
- mov lr, lr, lsr #1
- add lr, r4, lr
- sub r5, r5, #1
- b Ldo_single_div
-
- Lnot_too_big:
- 3: cmp lr, r3
- bcc 2b
-@ beq Ldo_single_div
-
- /* NB: these are commented out in the V8-Sparc manual as well */
- /* (I do not understand this) */
- @ lr > r3: went too far: back up 1 step
- @ srl lr, 1, lr
- @ dec r5
- @ do single-bit divide steps
- @
- @ We have to be careful here. We know that r3 >= lr, so we can do the
- @ first divide step without thinking. BUT, the others are conditional,
- @ and are only done if r3 >= 0. Because both r3 and lr may have the high-
- @ order bit set in the first step, just falling into the regular
- @ division loop will mess up the first time around.
- @ So we unroll slightly...
- Ldo_single_div:
- subs r5, r5, #1
- blt Lend_regular_divide
- sub r3, r3, lr
- mov r2, #1
- b Lend_single_divloop
- Lsingle_divloop:
- cmp r3, #0
- mov r2, r2, lsl #1
- mov lr, lr, lsr #1
- @ r3 >= 0
- subpl r3, r3, lr
- addpl r2, r2, #1
- @ r3 < 0
- addmi r3, r3, lr
- submi r2, r2, #1
- Lend_single_divloop:
- subs r5, r5, #1
- bge Lsingle_divloop
- b Lend_regular_divide
-
-1:
- add ip, ip, #1
-Lnot_really_big:
- mov lr, lr, lsl #4
- cmp lr, r3
- bls 1b
- @
- @ HOW CAN ip EVER BE -1 HERE ?????
- @
- cmn ip, #1
- beq Lgot_result
-
-Ldivloop:
- cmp r3, #0 @ set up for initial iteration
- mov r2, r2, lsl #4
- @ depth 1, accumulated bits 0
- mov lr, lr, lsr #1
- blt L.1.1015
- @ remainder is positive
- subs r3, r3, lr
- @ depth 2, accumulated bits 1
- mov lr, lr, lsr #1
- blt L.2.1016
- @ remainder is positive
- subs r3, r3, lr
- @ depth 3, accumulated bits 3
- mov lr, lr, lsr #1
- blt L.3.1018
- @ remainder is positive
- subs r3, r3, lr
- @ depth 4, accumulated bits 7
- mov lr, lr, lsr #1
- blt L.4.1022
- @ remainder is positive
- subs r3, r3, lr
- add r2, r2, #15
-
- b 9f
-
-L.4.1022:
- @ remainder is negative
- adds r3, r3, lr
- add r2, r2, #13
- b 9f
-
-
-
-L.3.1018:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 4, accumulated bits 5
- mov lr, lr, lsr #1
- blt L.4.1020
- @ remainder is positive
- subs r3, r3, lr
- add r2, r2, #11
-
- b 9f
-
-L.4.1020:
- @ remainder is negative
- adds r3, r3, lr
- add r2, r2, #9
- b 9f
-
-
-
-
-L.2.1016:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 3, accumulated bits 1
- mov lr, lr, lsr #1
- blt L.3.1016
- @ remainder is positive
- subs r3, r3, lr
- @ depth 4, accumulated bits 3
- mov lr, lr, lsr #1
- blt L.4.1018
- @ remainder is positive
- subs r3, r3, lr
- add r2, r2, #7
-
- b 9f
-
-L.4.1018:
- @ remainder is negative
- adds r3, r3, lr
- add r2, r2, #5
- b 9f
-
-
-
-L.3.1016:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 4, accumulated bits 1
- mov lr, lr, lsr #1
- blt L.4.1016
- @ remainder is positive
- subs r3, r3, lr
- add r2, r2, #3
-
- b 9f
-
-L.4.1016:
- @ remainder is negative
- adds r3, r3, lr
- add r2, r2, #1
- b 9f
-
-
-
-
-
-L.1.1015:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 2, accumulated bits -1
- mov lr, lr, lsr #1
- blt L.2.1014
- @ remainder is positive
- subs r3, r3, lr
- @ depth 3, accumulated bits -1
- mov lr, lr, lsr #1
- blt L.3.1014
- @ remainder is positive
- subs r3, r3, lr
- @ depth 4, accumulated bits -1
- mov lr, lr, lsr #1
- blt L.4.1014
- @ remainder is positive
- subs r3, r3, lr
- sub r2, r2, #1
-
- b 9f
-
-L.4.1014:
- @ remainder is negative
- adds r3, r3, lr
- sub r2, r2, #3
- b 9f
-
-
-
-L.3.1014:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 4, accumulated bits -3
- mov lr, lr, lsr #1
- blt L.4.1012
- @ remainder is positive
- subs r3, r3, lr
- sub r2, r2, #5
-
- b 9f
-
-L.4.1012:
- @ remainder is negative
- adds r3, r3, lr
- sub r2, r2, #7
- b 9f
-
-
-
-
-L.2.1014:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 3, accumulated bits -3
- mov lr, lr, lsr #1
- blt L.3.1012
- @ remainder is positive
- subs r3, r3, lr
- @ depth 4, accumulated bits -5
- mov lr, lr, lsr #1
- blt L.4.1010
- @ remainder is positive
- subs r3, r3, lr
- sub r2, r2, #9
-
- b 9f
-
-L.4.1010:
- @ remainder is negative
- adds r3, r3, lr
- sub r2, r2, #11
- b 9f
-
-
-
-L.3.1012:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 4, accumulated bits -7
- mov lr, lr, lsr #1
- blt L.4.1008
- @ remainder is positive
- subs r3, r3, lr
- sub r2, r2, #13
-
- b 9f
-
-L.4.1008:
- @ remainder is negative
- adds r3, r3, lr
- sub r2, r2, #15
- b 9f
-
-
-
-
-
- 9:
-Lend_regular_divide:
- subs ip, ip, #1
- bge Ldivloop
- cmp r3, #0
- @ non-restoring fixup here (one instruction only!)
- sublt r2, r2, #1
-
-
-Lgot_result:
- @ check to see if answer should be < 0
- cmp r6, #0
- rsbmi r2, r2, #0
-
- mov r0, r2
- ldmia sp!, {r4, r5, r6, pc}RETCOND
-
-Ldiv_zero:
- @ Divide by zero trap. If it returns, return 0 (about as
- @ wrong as possible, but that is what SunOS does...).
- bl SYM (__div0)
- mov r0, #0
- ldmia sp!, {r4, r5, r6, pc}RETCOND
-
-#endif /* L_divsi3 */
-
#ifdef L_umodsi3
-ip .req r12
-sp .req r13
-lr .req r14
-pc .req r15
-.text
+dividend .req r0
+divisor .req r1
+overdone .req r2
+curbit .req r3
+ip .req r12
+sp .req r13
+lr .req r14
+pc .req r15
+ .text
.globl SYM (__umodsi3)
.align 0
-SYM (__umodsi3):
- stmdb sp!, {r4, r5, lr}
- @ Ready to divide. Compute size of quotient; scale comparand.
- movs lr, r1
- mov r3, r0
- beq Ldiv_zero
-
-
- cmp r3, lr @ if r1 exceeds r0, done
- mov r2, #0
- bcc Lgot_result @ (and algorithm fails otherwise)
- mov r4, #(1 << (32 - 4 - 1))
- cmp r3, r4
- mov ip, #0
- bcc Lnot_really_big
-
- @ Here the dividend is >= 2^(31-N) or so. We must be careful here,
- @ as our usual N-at-a-shot divide step will cause overflow and havoc.
- @ The number of bits in the result here is N*ITER+SC, where SC <= N.
- @ Compute ITER in an unorthodox manner: know we need to shift V into
- @ the top decade: so do not even bother to compare to R.
- mov r5, #1
- 1:
- cmp lr, r4
- bcs 3f
- mov lr, lr, lsl #4
- add ip, ip, #1
- b 1b
-
- @ Now compute r5.
- 2: adds lr, lr, lr
- add r5, r5, #1
- bcc Lnot_too_big
-
- @ We get here if the r1 overflowed while shifting.
- @ This means that r3 has the high-order bit set.
- @ Restore lr and subtract from r3.
- mov r4, r4, lsl #4
- mov lr, lr, lsr #1
- add lr, r4, lr
- sub r5, r5, #1
- b Ldo_single_div
-
- Lnot_too_big:
- 3: cmp lr, r3
- bcc 2b
-@ beq Ldo_single_div
-
- /* NB: these are commented out in the V8-Sparc manual as well */
- /* (I do not understand this) */
- @ lr > r3: went too far: back up 1 step
- @ srl lr, 1, lr
- @ dec r5
- @ do single-bit divide steps
- @
- @ We have to be careful here. We know that r3 >= lr, so we can do the
- @ first divide step without thinking. BUT, the others are conditional,
- @ and are only done if r3 >= 0. Because both r3 and lr may have the high-
- @ order bit set in the first step, just falling into the regular
- @ division loop will mess up the first time around.
- @ So we unroll slightly...
- Ldo_single_div:
- subs r5, r5, #1
- blt Lend_regular_divide
- sub r3, r3, lr
- mov r2, #1
- b Lend_single_divloop
- Lsingle_divloop:
- cmp r3, #0
- mov r2, r2, lsl #1
- mov lr, lr, lsr #1
- @ r3 >= 0
- subpl r3, r3, lr
- addpl r2, r2, #1
- @ r3 < 0
- addmi r3, r3, lr
- submi r2, r2, #1
- Lend_single_divloop:
- subs r5, r5, #1
- bge Lsingle_divloop
- b Lend_regular_divide
-1:
- add ip, ip, #1
-Lnot_really_big:
- mov lr, lr, lsl #4
- cmp lr, r3
- bls 1b
- @
- @ HOW CAN ip EVER BE -1 HERE ?????
- @
- cmn ip, #1
- beq Lgot_result
-
-Ldivloop:
- cmp r3, #0 @ set up for initial iteration
- mov r2, r2, lsl #4
- @ depth 1, accumulated bits 0
- mov lr, lr, lsr #1
- blt L.1.1015
- @ remainder is positive
- subs r3, r3, lr
- @ depth 2, accumulated bits 1
- mov lr, lr, lsr #1
- blt L.2.1016
- @ remainder is positive
- subs r3, r3, lr
- @ depth 3, accumulated bits 3
- mov lr, lr, lsr #1
- blt L.3.1018
- @ remainder is positive
- subs r3, r3, lr
- @ depth 4, accumulated bits 7
- mov lr, lr, lsr #1
- blt L.4.1022
- @ remainder is positive
- subs r3, r3, lr
- add r2, r2, #15
-
- b 9f
-
-L.4.1022:
- @ remainder is negative
- adds r3, r3, lr
- add r2, r2, #13
- b 9f
-
-
-
-L.3.1018:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 4, accumulated bits 5
- mov lr, lr, lsr #1
- blt L.4.1020
- @ remainder is positive
- subs r3, r3, lr
- add r2, r2, #11
-
- b 9f
-
-L.4.1020:
- @ remainder is negative
- adds r3, r3, lr
- add r2, r2, #9
- b 9f
-
-
-
-
-L.2.1016:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 3, accumulated bits 1
- mov lr, lr, lsr #1
- blt L.3.1016
- @ remainder is positive
- subs r3, r3, lr
- @ depth 4, accumulated bits 3
- mov lr, lr, lsr #1
- blt L.4.1018
- @ remainder is positive
- subs r3, r3, lr
- add r2, r2, #7
-
- b 9f
-
-L.4.1018:
- @ remainder is negative
- adds r3, r3, lr
- add r2, r2, #5
- b 9f
-
-
-
-L.3.1016:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 4, accumulated bits 1
- mov lr, lr, lsr #1
- blt L.4.1016
- @ remainder is positive
- subs r3, r3, lr
- add r2, r2, #3
-
- b 9f
-
-L.4.1016:
- @ remainder is negative
- adds r3, r3, lr
- add r2, r2, #1
- b 9f
-
-
-
-
-
-L.1.1015:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 2, accumulated bits -1
- mov lr, lr, lsr #1
- blt L.2.1014
- @ remainder is positive
- subs r3, r3, lr
- @ depth 3, accumulated bits -1
- mov lr, lr, lsr #1
- blt L.3.1014
- @ remainder is positive
- subs r3, r3, lr
- @ depth 4, accumulated bits -1
- mov lr, lr, lsr #1
- blt L.4.1014
- @ remainder is positive
- subs r3, r3, lr
- sub r2, r2, #1
-
- b 9f
-
-L.4.1014:
- @ remainder is negative
- adds r3, r3, lr
- sub r2, r2, #3
- b 9f
-
-
-
-L.3.1014:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 4, accumulated bits -3
- mov lr, lr, lsr #1
- blt L.4.1012
- @ remainder is positive
- subs r3, r3, lr
- sub r2, r2, #5
-
- b 9f
-
-L.4.1012:
- @ remainder is negative
- adds r3, r3, lr
- sub r2, r2, #7
- b 9f
-
-
-
-
-L.2.1014:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 3, accumulated bits -3
- mov lr, lr, lsr #1
- blt L.3.1012
- @ remainder is positive
- subs r3, r3, lr
- @ depth 4, accumulated bits -5
- mov lr, lr, lsr #1
- blt L.4.1010
- @ remainder is positive
- subs r3, r3, lr
- sub r2, r2, #9
-
- b 9f
-
-L.4.1010:
- @ remainder is negative
- adds r3, r3, lr
- sub r2, r2, #11
- b 9f
+SYM (__umodsi3):
+ cmp divisor, #0
+ beq Ldiv0
+ mov curbit, #1
+ cmp dividend, divisor
+ RETc(cc) pc, lr
+Loop1:
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+ cmp divisor, #0x10000000
+ cmpcc divisor, dividend
+ movcc divisor, divisor, lsl #4
+ movcc curbit, curbit, lsl #4
+ bcc Loop1
+
+Lbignum:
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+ cmp divisor, #0x80000000
+ cmpcc divisor, dividend
+ movcc divisor, divisor, lsl #1
+ movcc curbit, curbit, lsl #1
+ bcc Lbignum
+
+Loop3:
+ @ Test for possible subtractions. On the final pass, this may
+ @ subtract too much from the dividend, so keep track of which
+ @ subtractions are done, we can fix them up afterwards...
+ mov overdone, #0
+ cmp dividend, divisor
+ subcs dividend, dividend, divisor
+ cmp dividend, divisor, lsr #1
+ subcs dividend, dividend, divisor, lsr #1
+ orrcs overdone, overdone, curbit, ror #1
+ cmp dividend, divisor, lsr #2
+ subcs dividend, dividend, divisor, lsr #2
+ orrcs overdone, overdone, curbit, ror #2
+ cmp dividend, divisor, lsr #3
+ subcs dividend, dividend, divisor, lsr #3
+ orrcs overdone, overdone, curbit, ror #3
+ mov ip, curbit
+ cmp dividend, #0 @ Early termination?
+ movnes curbit, curbit, lsr #4 @ No, any more bits to do?
+ movne divisor, divisor, lsr #4
+ bne Loop3
+
+ @ Any subtractions that we should not have done will be recorded in
+ @ the top three bits of "overdone". Exactly which were not needed
+ @ are governed by the position of the bit, stored in ip.
+ @ If we terminated early, because dividend became zero,
+ @ then none of the below will match, since the bit in ip will not be
+ @ in the bottom nibble.
+ ands overdone, overdone, #0xe0000000
+ RETc(eq) pc, lr @ No fixups needed
+ tst overdone, ip, ror #3
+ addne dividend, dividend, divisor, lsr #3
+ tst overdone, ip, ror #2
+ addne dividend, dividend, divisor, lsr #2
+ tst overdone, ip, ror #1
+ addne dividend, dividend, divisor, lsr #1
+ RET pc, lr
-
-
-L.3.1012:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 4, accumulated bits -7
- mov lr, lr, lsr #1
- blt L.4.1008
- @ remainder is positive
- subs r3, r3, lr
- sub r2, r2, #13
+Ldiv0:
+ str lr, [sp, #-4]!
+ bl SYM (__div0)
+ mov r0, #0 @ about as wrong as it could be
+ ldmia sp!, {pc}RETCOND
- b 9f
-
-L.4.1008:
- @ remainder is negative
- adds r3, r3, lr
- sub r2, r2, #15
- b 9f
+#endif /* L_umodsi3 */
-
-
-
-
- 9:
-Lend_regular_divide:
- subs ip, ip, #1
- bge Ldivloop
- cmp r3, #0
- @ non-restoring fixup here (one instruction only!)
- addlt r3, r1, r3
+#ifdef L_divsi3
+dividend .req r0
+divisor .req r1
+result .req r2
+curbit .req r3
+ip .req r12
+sp .req r13
+lr .req r14
+pc .req r15
+ .text
+ .globl SYM (__divsi3)
+ .align 0
+SYM (__divsi3):
+ eor ip, dividend, divisor @ Save the sign of the result.
+ mov curbit, #1
+ mov result, #0
+ cmp divisor, #0
+ rsbmi divisor, divisor, #0 @ Loops below use unsigned.
+ beq Ldiv0
+ cmp dividend, #0
+ rsbmi dividend, dividend, #0
+ cmp dividend, divisor
+ bcc Lgot_result
+
+Loop1:
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+ cmp divisor, #0x10000000
+ cmpcc divisor, dividend
+ movcc divisor, divisor, lsl #4
+ movcc curbit, curbit, lsl #4
+ bcc Loop1
+
+Lbignum:
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+ cmp divisor, #0x80000000
+ cmpcc divisor, dividend
+ movcc divisor, divisor, lsl #1
+ movcc curbit, curbit, lsl #1
+ bcc Lbignum
+
+Loop3:
+ @ Test for possible subtractions, and note which bits
+ @ are done in the result. On the final pass, this may subtract
+ @ too much from the dividend, but the result will be ok, since the
+ @ "bit" will have been shifted out at the bottom.
+ cmp dividend, divisor
+ subcs dividend, dividend, divisor
+ orrcs result, result, curbit
+ cmp dividend, divisor, lsr #1
+ subcs dividend, dividend, divisor, lsr #1
+ orrcs result, result, curbit, lsr #1
+ cmp dividend, divisor, lsr #2
+ subcs dividend, dividend, divisor, lsr #2
+ orrcs result, result, curbit, lsr #2
+ cmp dividend, divisor, lsr #3
+ subcs dividend, dividend, divisor, lsr #3
+ orrcs result, result, curbit, lsr #3
+ cmp dividend, #0 @ Early termination?
+ movnes curbit, curbit, lsr #4 @ No, any more bits to do?
+ movne divisor, divisor, lsr #4
+ bne Loop3
Lgot_result:
+ mov r0, result
+ cmp ip, #0
+ rsbmi r0, r0, #0
+ RET pc, lr
- mov r0, r3
- ldmia sp!, {r4, r5, pc}RETCOND
-
-Ldiv_zero:
- @ Divide by zero trap. If it returns, return 0 (about as
- @ wrong as possible, but that is what SunOS does...).
+Ldiv0:
+ str lr, [sp, #-4]!
bl SYM (__div0)
- mov r0, #0
- ldmia sp!, {r4, r5, pc}RETCOND
+ mov r0, #0 @ about as wrong as it could be
+ ldmia sp!, {pc}RETCOND
-#endif /* L_umodsi3 */
+#endif /* L_divsi3 */
#ifdef L_modsi3
-ip .req r12
-sp .req r13
-lr .req r14
-pc .req r15
-.text
+dividend .req r0
+divisor .req r1
+overdone .req r2
+curbit .req r3
+ip .req r12
+sp .req r13
+lr .req r14
+pc .req r15
+ .text
.globl SYM (__modsi3)
.align 0
-SYM (__modsi3):
- stmdb sp!, {r4, r5, r6, lr}
- @ compute sign of result; if neither is negative, no problem
- mov r6, r0
- cmp r1, #0
- rsbmi r1, r1, #0
- beq Ldiv_zero
- mov lr, r1
- movs r3, r0
- rsbmi r3, r3, #0 @ make dividend nonnegative
-
- cmp r3, lr @ if r1 exceeds r0, done
- mov r2, #0
- bcc Lgot_result @ (and algorithm fails otherwise)
- mov r4, #(1 << (32 - 4 - 1))
- cmp r3, r4
- mov ip, #0
- bcc Lnot_really_big
-
- @ Here the dividend is >= 2^(31-N) or so. We must be careful here,
- @ as our usual N-at-a-shot divide step will cause overflow and havoc.
- @ The number of bits in the result here is N*ITER+SC, where SC <= N.
- @ Compute ITER in an unorthodox manner: know we need to shift V into
- @ the top decade: so do not even bother to compare to R.
- mov r5, #1
- 1:
- cmp lr, r4
- bcs 3f
- mov lr, lr, lsl #4
- add ip, ip, #1
- b 1b
-
- @ Now compute r5.
- 2: adds lr, lr, lr
- add r5, r5, #1
- bcc Lnot_too_big
-
- @ We get here if the r1 overflowed while shifting.
- @ This means that r3 has the high-order bit set.
- @ Restore lr and subtract from r3.
- mov r4, r4, lsl #4
- mov lr, lr, lsr #1
- add lr, r4, lr
- sub r5, r5, #1
- b Ldo_single_div
-
- Lnot_too_big:
- 3: cmp lr, r3
- bcc 2b
-@ beq Ldo_single_div
-
- /* NB: these are commented out in the V8-Sparc manual as well */
- /* (I do not understand this) */
- @ lr > r3: went too far: back up 1 step
- @ srl lr, 1, lr
- @ dec r5
- @ do single-bit divide steps
- @
- @ We have to be careful here. We know that r3 >= lr, so we can do the
- @ first divide step without thinking. BUT, the others are conditional,
- @ and are only done if r3 >= 0. Because both r3 and lr may have the high-
- @ order bit set in the first step, just falling into the regular
- @ division loop will mess up the first time around.
- @ So we unroll slightly...
- Ldo_single_div:
- subs r5, r5, #1
- blt Lend_regular_divide
- sub r3, r3, lr
- mov r2, #1
- b Lend_single_divloop
- Lsingle_divloop:
- cmp r3, #0
- mov r2, r2, lsl #1
- mov lr, lr, lsr #1
- @ r3 >= 0
- subpl r3, r3, lr
- addpl r2, r2, #1
- @ r3 < 0
- addmi r3, r3, lr
- submi r2, r2, #1
- Lend_single_divloop:
- subs r5, r5, #1
- bge Lsingle_divloop
- b Lend_regular_divide
-
-1:
- add ip, ip, #1
-Lnot_really_big:
- mov lr, lr, lsl #4
- cmp lr, r3
- bls 1b
- @
- @ HOW CAN ip EVER BE -1 HERE ?????
- @
- cmn ip, #1
+SYM (__modsi3):
+ mov curbit, #1
+ cmp divisor, #0
+ rsbmi divisor, divisor, #0 @ Loops below use unsigned.
+ beq Ldiv0
+ @ Need to save the sign of the dividend, unfortunately, we need
+ @ ip later on; this is faster than pushing lr and using that.
+ str dividend, [sp, #-4]!
+ cmp dividend, #0
+ rsbmi dividend, dividend, #0
+ cmp dividend, divisor
+ bcc Lgot_result
+
+Loop1:
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+ cmp divisor, #0x10000000
+ cmpcc divisor, dividend
+ movcc divisor, divisor, lsl #4
+ movcc curbit, curbit, lsl #4
+ bcc Loop1
+
+Lbignum:
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+ cmp divisor, #0x80000000
+ cmpcc divisor, dividend
+ movcc divisor, divisor, lsl #1
+ movcc curbit, curbit, lsl #1
+ bcc Lbignum
+
+Loop3:
+ @ Test for possible subtractions. On the final pass, this may
+ @ subtract too much from the dividend, so keep track of which
+ @ subtractions are done, we can fix them up afterwards...
+ mov overdone, #0
+ cmp dividend, divisor
+ subcs dividend, dividend, divisor
+ cmp dividend, divisor, lsr #1
+ subcs dividend, dividend, divisor, lsr #1
+ orrcs overdone, overdone, curbit, ror #1
+ cmp dividend, divisor, lsr #2
+ subcs dividend, dividend, divisor, lsr #2
+ orrcs overdone, overdone, curbit, ror #2
+ cmp dividend, divisor, lsr #3
+ subcs dividend, dividend, divisor, lsr #3
+ orrcs overdone, overdone, curbit, ror #3
+ mov ip, curbit
+ cmp dividend, #0 @ Early termination?
+ movnes curbit, curbit, lsr #4 @ No, any more bits to do?
+ movne divisor, divisor, lsr #4
+ bne Loop3
+
+ @ Any subtractions that we should not have done will be recorded in
+ @ the top three bits of "overdone". Exactly which were not needed
+ @ are governed by the position of the bit, stored in ip.
+ @ If we terminated early, because dividend became zero,
+ @ then none of the below will match, since the bit in ip will not be
+ @ in the bottom nibble.
+ ands overdone, overdone, #0xe0000000
beq Lgot_result
-
-Ldivloop:
- cmp r3, #0 @ set up for initial iteration
- mov r2, r2, lsl #4
- @ depth 1, accumulated bits 0
- mov lr, lr, lsr #1
- blt L.1.1015
- @ remainder is positive
- subs r3, r3, lr
- @ depth 2, accumulated bits 1
- mov lr, lr, lsr #1
- blt L.2.1016
- @ remainder is positive
- subs r3, r3, lr
- @ depth 3, accumulated bits 3
- mov lr, lr, lsr #1
- blt L.3.1018
- @ remainder is positive
- subs r3, r3, lr
- @ depth 4, accumulated bits 7
- mov lr, lr, lsr #1
- blt L.4.1022
- @ remainder is positive
- subs r3, r3, lr
- add r2, r2, #15
-
- b 9f
-
-L.4.1022:
- @ remainder is negative
- adds r3, r3, lr
- add r2, r2, #13
- b 9f
-
-
-
-L.3.1018:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 4, accumulated bits 5
- mov lr, lr, lsr #1
- blt L.4.1020
- @ remainder is positive
- subs r3, r3, lr
- add r2, r2, #11
-
- b 9f
-
-L.4.1020:
- @ remainder is negative
- adds r3, r3, lr
- add r2, r2, #9
- b 9f
-
-
-
-
-L.2.1016:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 3, accumulated bits 1
- mov lr, lr, lsr #1
- blt L.3.1016
- @ remainder is positive
- subs r3, r3, lr
- @ depth 4, accumulated bits 3
- mov lr, lr, lsr #1
- blt L.4.1018
- @ remainder is positive
- subs r3, r3, lr
- add r2, r2, #7
-
- b 9f
-
-L.4.1018:
- @ remainder is negative
- adds r3, r3, lr
- add r2, r2, #5
- b 9f
-
-
-
-L.3.1016:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 4, accumulated bits 1
- mov lr, lr, lsr #1
- blt L.4.1016
- @ remainder is positive
- subs r3, r3, lr
- add r2, r2, #3
-
- b 9f
-
-L.4.1016:
- @ remainder is negative
- adds r3, r3, lr
- add r2, r2, #1
- b 9f
-
-
-
-
-
-L.1.1015:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 2, accumulated bits -1
- mov lr, lr, lsr #1
- blt L.2.1014
- @ remainder is positive
- subs r3, r3, lr
- @ depth 3, accumulated bits -1
- mov lr, lr, lsr #1
- blt L.3.1014
- @ remainder is positive
- subs r3, r3, lr
- @ depth 4, accumulated bits -1
- mov lr, lr, lsr #1
- blt L.4.1014
- @ remainder is positive
- subs r3, r3, lr
- sub r2, r2, #1
-
- b 9f
-
-L.4.1014:
- @ remainder is negative
- adds r3, r3, lr
- sub r2, r2, #3
- b 9f
-
-
-
-L.3.1014:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 4, accumulated bits -3
- mov lr, lr, lsr #1
- blt L.4.1012
- @ remainder is positive
- subs r3, r3, lr
- sub r2, r2, #5
-
- b 9f
-
-L.4.1012:
- @ remainder is negative
- adds r3, r3, lr
- sub r2, r2, #7
- b 9f
-
-
-
-
-L.2.1014:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 3, accumulated bits -3
- mov lr, lr, lsr #1
- blt L.3.1012
- @ remainder is positive
- subs r3, r3, lr
- @ depth 4, accumulated bits -5
- mov lr, lr, lsr #1
- blt L.4.1010
- @ remainder is positive
- subs r3, r3, lr
- sub r2, r2, #9
-
- b 9f
-
-L.4.1010:
- @ remainder is negative
- adds r3, r3, lr
- sub r2, r2, #11
- b 9f
-
-
-
-L.3.1012:
- @ remainder is negative
- adds r3, r3, lr
- @ depth 4, accumulated bits -7
- mov lr, lr, lsr #1
- blt L.4.1008
- @ remainder is positive
- subs r3, r3, lr
- sub r2, r2, #13
-
- b 9f
-
-L.4.1008:
- @ remainder is negative
- adds r3, r3, lr
- sub r2, r2, #15
- b 9f
-
-
-
-
-
- 9:
-Lend_regular_divide:
- subs ip, ip, #1
- bge Ldivloop
- cmp r3, #0
- @ non-restoring fixup here (one instruction only!)
- addlt r3, r1, r3
-
-
+ tst overdone, ip, ror #3
+ addne dividend, dividend, divisor, lsr #3
+ tst overdone, ip, ror #2
+ addne dividend, dividend, divisor, lsr #2
+ tst overdone, ip, ror #1
+ addne dividend, dividend, divisor, lsr #1
Lgot_result:
- @ check to see if answer should be < 0
- cmp r6, #0
- rsbmi r3, r3, #0
-
- mov r0, r3
- ldmia sp!, {r4, r5, r6, pc}RETCOND
+ ldr ip, [sp], #4
+ cmp ip, #0
+ rsbmi dividend, dividend, #0
+ RET pc, lr
-Ldiv_zero:
- @ Divide by zero trap. If it returns, return 0 (about as
- @ wrong as possible, but that is what SunOS does...).
+Ldiv0:
+ str lr, [sp, #-4]!
bl SYM (__div0)
- mov r0, #0
- ldmia sp!, {r4, r5, r6, pc}RETCOND
+ mov r0, #0 @ about as wrong as it could be
+ ldmia sp!, {pc}RETCOND
#endif /* L_modsi3 */