summaryrefslogtreecommitdiff
path: root/libgcc/config/avr/lib1funcs.S
diff options
context:
space:
mode:
Diffstat (limited to 'libgcc/config/avr/lib1funcs.S')
-rw-r--r--libgcc/config/avr/lib1funcs.S423
1 files changed, 273 insertions, 150 deletions
diff --git a/libgcc/config/avr/lib1funcs.S b/libgcc/config/avr/lib1funcs.S
index 95a7d3d4eeb..6b9879ee7d7 100644
--- a/libgcc/config/avr/lib1funcs.S
+++ b/libgcc/config/avr/lib1funcs.S
@@ -91,6 +91,35 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
.endfunc
.endm
+;; Negate a 2-byte value held in consecutive registers
+.macro NEG2 reg
+ com \reg+1
+ neg \reg
+ sbci \reg+1, -1
+.endm
+
+;; Negate a 4-byte value held in consecutive registers
+.macro NEG4 reg
+ com \reg+3
+ com \reg+2
+ com \reg+1
+.if \reg >= 16
+ neg \reg
+ sbci \reg+1, -1
+ sbci \reg+2, -1
+ sbci \reg+3, -1
+.else
+ com \reg
+ adc \reg, __zero_reg__
+ adc \reg+1, __zero_reg__
+ adc \reg+2, __zero_reg__
+ adc \reg+3, __zero_reg__
+.endif
+.endm
+
+#define exp_lo(N) hlo8 ((N) << 23)
+#define exp_hi(N) hhi8 ((N) << 23)
+
.section .text.libgcc.mul, "ax", @progbits
@@ -126,175 +155,246 @@ ENDF __mulqi3
#endif /* defined (L_mulqi3) */
-#if defined (L_mulqihi3)
-DEFUN __mulqihi3
- clr r25
- sbrc r24, 7
- dec r25
- clr r23
- sbrc r22, 7
- dec r22
- XJMP __mulhi3
-ENDF __mulqihi3:
-#endif /* defined (L_mulqihi3) */
+
+/*******************************************************
+ Widening Multiplication 16 = 8 x 8 without MUL
+ Multiplication 16 x 16 without MUL
+*******************************************************/
+
+#define A0 r22
+#define A1 r23
+#define B0 r24
+#define BB0 r20
+#define B1 r25
+;; Output overlaps input, thus expand result in CC0/1
+#define C0 r24
+#define C1 r25
+#define CC0 __tmp_reg__
+#define CC1 R21
#if defined (L_umulqihi3)
+;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
+;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
+;;; Clobbers: __tmp_reg__, R21..R23
DEFUN __umulqihi3
- clr r25
- clr r23
- XJMP __mulhi3
+ clr A1
+ clr B1
+ XJMP __mulhi3
ENDF __umulqihi3
-#endif /* defined (L_umulqihi3) */
+#endif /* L_umulqihi3 */
-/*******************************************************
- Multiplication 16 x 16 without MUL
-*******************************************************/
-#if defined (L_mulhi3)
-#define r_arg1L r24 /* multiplier Low */
-#define r_arg1H r25 /* multiplier High */
-#define r_arg2L r22 /* multiplicand Low */
-#define r_arg2H r23 /* multiplicand High */
-#define r_resL __tmp_reg__ /* result Low */
-#define r_resH r21 /* result High */
+#if defined (L_mulqihi3)
+;;; R25:R24 = (signed int) R22 * (signed int) R24
+;;; (C1:C0) = (signed int) A0 * (signed int) B0
+;;; Clobbers: __tmp_reg__, R20..R23
+DEFUN __mulqihi3
+ ;; Sign-extend B0
+ clr B1
+ sbrc B0, 7
+ com B1
+ ;; The multiplication runs twice as fast if A1 is zero, thus:
+ ;; Zero-extend A0
+ clr A1
+#ifdef __AVR_HAVE_JMP_CALL__
+ ;; Store B0 * sign of A
+ clr BB0
+ sbrc A0, 7
+ mov BB0, B0
+ call __mulhi3
+#else /* have no CALL */
+ ;; Skip sign-extension of A if A >= 0
+ ;; Same size as with the first alternative but avoids errata skip
+ ;; and is faster if A >= 0
+ sbrs A0, 7
+ rjmp __mulhi3
+ ;; If A < 0 store B
+ mov BB0, B0
+ rcall __mulhi3
+#endif /* HAVE_JMP_CALL */
+ ;; 1-extend A after the multiplication
+ sub C1, BB0
+ ret
+ENDF __mulqihi3
+#endif /* L_mulqihi3 */
+#if defined (L_mulhi3)
+;;; R25:R24 = R23:R22 * R25:R24
+;;; (C1:C0) = (A1:A0) * (B1:B0)
+;;; Clobbers: __tmp_reg__, R21..R23
DEFUN __mulhi3
- clr r_resH ; clear result
- clr r_resL ; clear result
-__mulhi3_loop:
- sbrs r_arg1L,0
- rjmp __mulhi3_skip1
- add r_resL,r_arg2L ; result + multiplicand
- adc r_resH,r_arg2H
-__mulhi3_skip1:
- add r_arg2L,r_arg2L ; shift multiplicand
- adc r_arg2H,r_arg2H
-
- cp r_arg2L,__zero_reg__
- cpc r_arg2H,__zero_reg__
- breq __mulhi3_exit ; while multiplicand != 0
-
- lsr r_arg1H ; gets LSB of multiplier
- ror r_arg1L
- sbiw r_arg1L,0
- brne __mulhi3_loop ; exit if multiplier = 0
-__mulhi3_exit:
- mov r_arg1H,r_resH ; result to return register
- mov r_arg1L,r_resL
- ret
-ENDF __mulhi3
-#undef r_arg1L
-#undef r_arg1H
-#undef r_arg2L
-#undef r_arg2H
-#undef r_resL
-#undef r_resH
+ ;; Clear result
+ clr CC0
+ clr CC1
+ rjmp 3f
+1:
+ ;; Bit n of A is 1 --> C += B << n
+ add CC0, B0
+ adc CC1, B1
+2:
+ lsl B0
+ rol B1
+3:
+ ;; If B == 0 we are ready
+ sbiw B0, 0
+ breq 9f
+
+ ;; Carry = n-th bit of A
+ lsr A1
+ ror A0
+ ;; If bit n of A is set, then go add B * 2^n to C
+ brcs 1b
+
+ ;; Carry = 0 --> The ROR above acts like CP A0, 0
+ ;; Thus, it is sufficient to CPC the high part to test A against 0
+ cpc A1, __zero_reg__
+ ;; Only proceed if A != 0
+ brne 2b
+9:
+ ;; Move Result into place
+ mov C0, CC0
+ mov C1, CC1
+ ret
+ENDF __mulhi3
+#endif /* L_mulhi3 */
-#endif /* defined (L_mulhi3) */
+#undef A0
+#undef A1
+#undef B0
+#undef BB0
+#undef B1
+#undef C0
+#undef C1
+#undef CC0
+#undef CC1
+
+
+#define A0 22
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+
+#define B0 18
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define CC0 26
+#define CC1 CC0+1
+#define CC2 30
+#define CC3 CC2+1
+
+#define C0 22
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
/*******************************************************
Widening Multiplication 32 = 16 x 16 without MUL
*******************************************************/
-#if defined (L_mulhisi3)
-DEFUN __mulhisi3
-;;; FIXME: This is dead code (noone calls it)
- mov_l r18, r24
- mov_h r19, r25
- clr r24
- sbrc r23, 7
- dec r24
- mov r25, r24
- clr r20
- sbrc r19, 7
- dec r20
- mov r21, r20
- XJMP __mulsi3
-ENDF __mulhisi3
-#endif /* defined (L_mulhisi3) */
-
#if defined (L_umulhisi3)
DEFUN __umulhisi3
-;;; FIXME: This is dead code (noone calls it)
- mov_l r18, r24
- mov_h r19, r25
- clr r24
- clr r25
- mov_l r20, r24
- mov_h r21, r25
+ wmov B0, 24
+ ;; Zero-extend B
+ clr B2
+ clr B3
+ ;; Zero-extend A
+ wmov A2, B2
XJMP __mulsi3
ENDF __umulhisi3
-#endif /* defined (L_umulhisi3) */
+#endif /* L_umulhisi3 */
+
+#if defined (L_mulhisi3)
+DEFUN __mulhisi3
+ wmov B0, 24
+ ;; Sign-extend B
+ lsl r25
+ sbc B2, B2
+ mov B3, B2
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+ ;; Sign-extend A
+ clr A2
+ sbrc A1, 7
+ com A2
+ mov A3, A2
+ XJMP __mulsi3
+#else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
+ ;; Zero-extend A and __mulsi3 will run at least twice as fast
+ ;; compared to a sign-extended A.
+ clr A2
+ clr A3
+ sbrs A1, 7
+ XJMP __mulsi3
+ ;; If A < 0 then perform the B * 0xffff.... before the
+ ;; very multiplication by initializing the high part of the
+ ;; result CC with -B.
+ wmov CC2, A2
+ sub CC2, B0
+ sbc CC3, B1
+ XJMP __mulsi3_helper
+#endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
+ENDF __mulhisi3
+#endif /* L_mulhisi3 */
+
-#if defined (L_mulsi3)
/*******************************************************
Multiplication 32 x 32 without MUL
*******************************************************/
-#define r_arg1L r22 /* multiplier Low */
-#define r_arg1H r23
-#define r_arg1HL r24
-#define r_arg1HH r25 /* multiplier High */
-
-#define r_arg2L r18 /* multiplicand Low */
-#define r_arg2H r19
-#define r_arg2HL r20
-#define r_arg2HH r21 /* multiplicand High */
-
-#define r_resL r26 /* result Low */
-#define r_resH r27
-#define r_resHL r30
-#define r_resHH r31 /* result High */
+#if defined (L_mulsi3)
DEFUN __mulsi3
- clr r_resHH ; clear result
- clr r_resHL ; clear result
- clr r_resH ; clear result
- clr r_resL ; clear result
-__mulsi3_loop:
- sbrs r_arg1L,0
- rjmp __mulsi3_skip1
- add r_resL,r_arg2L ; result + multiplicand
- adc r_resH,r_arg2H
- adc r_resHL,r_arg2HL
- adc r_resHH,r_arg2HH
-__mulsi3_skip1:
- add r_arg2L,r_arg2L ; shift multiplicand
- adc r_arg2H,r_arg2H
- adc r_arg2HL,r_arg2HL
- adc r_arg2HH,r_arg2HH
-
- lsr r_arg1HH ; gets LSB of multiplier
- ror r_arg1HL
- ror r_arg1H
- ror r_arg1L
- brne __mulsi3_loop
- sbiw r_arg1HL,0
- cpc r_arg1H,r_arg1L
- brne __mulsi3_loop ; exit if multiplier = 0
-__mulsi3_exit:
- mov_h r_arg1HH,r_resHH ; result to return register
- mov_l r_arg1HL,r_resHL
- mov_h r_arg1H,r_resH
- mov_l r_arg1L,r_resL
- ret
-ENDF __mulsi3
+ ;; Clear result
+ clr CC2
+ clr CC3
+ ;; FALLTHRU
+ENDF __mulsi3
-#undef r_arg1L
-#undef r_arg1H
-#undef r_arg1HL
-#undef r_arg1HH
-
-#undef r_arg2L
-#undef r_arg2H
-#undef r_arg2HL
-#undef r_arg2HH
-
-#undef r_resL
-#undef r_resH
-#undef r_resHL
-#undef r_resHH
+DEFUN __mulsi3_helper
+ clr CC0
+ clr CC1
+ rjmp 3f
+
+1: ;; If bit n of A is set, then add B * 2^n to the result in CC
+ ;; CC += B
+ add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
+
+2: ;; B <<= 1
+ lsl B0 $ rol B1 $ rol B2 $ rol B3
+
+3: ;; A >>= 1: Carry = n-th bit of A
+ lsr A3 $ ror A2 $ ror A1 $ ror A0
+
+ brcs 1b
+ ;; Only continue if A != 0
+ sbci A1, 0
+ brne 2b
+ sbiw A2, 0
+ brne 2b
+
+ ;; All bits of A are consumed: Copy result to return register C
+ wmov C0, CC0
+ wmov C2, CC2
+ ret
+ENDF __mulsi3_helper
+#endif /* L_mulsi3 */
-#endif /* defined (L_mulsi3) */
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef CC0
+#undef CC1
+#undef CC2
+#undef CC3
#endif /* !defined (__AVR_HAVE_MUL__) */
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -316,7 +416,7 @@ ENDF __mulsi3
#define C3 C0+3
/*******************************************************
- Widening Multiplication 32 = 16 x 16
+ Widening Multiplication 32 = 16 x 16 with MUL
*******************************************************/
#if defined (L_mulhisi3)
@@ -364,7 +464,17 @@ DEFUN __umulhisi3
mul A1, B1
movw C2, r0
mul A0, B1
+#ifdef __AVR_HAVE_JMP_CALL__
+ ;; This function is used by many other routines, often multiple times.
+ ;; Therefore, if the flash size is not too limited, avoid the RCALL
+ ;; and inverst 6 Bytes to speed things up.
+ add C1, r0
+ adc C2, r1
+ clr __zero_reg__
+ adc C3, __zero_reg__
+#else
rcall 1f
+#endif
mul A1, B0
1: add C1, r0
adc C2, r1
@@ -375,7 +485,7 @@ ENDF __umulhisi3
#endif /* L_umulhisi3 */
/*******************************************************
- Widening Multiplication 32 = 16 x 32
+ Widening Multiplication 32 = 16 x 32 with MUL
*******************************************************/
#if defined (L_mulshisi3)
@@ -425,7 +535,7 @@ ENDF __muluhisi3
#endif /* L_muluhisi3 */
/*******************************************************
- Multiplication 32 x 32
+ Multiplication 32 x 32 with MUL
*******************************************************/
#if defined (L_mulsi3)
@@ -468,7 +578,7 @@ ENDF __mulsi3
#endif /* __AVR_HAVE_MUL__ */
/*******************************************************
- Multiplication 24 x 24
+ Multiplication 24 x 24 with MUL
*******************************************************/
#if defined (L_mulpsi3)
@@ -1247,6 +1357,19 @@ __divmodsi4_exit:
ENDF __divmodsi4
#endif /* defined (L_divmodsi4) */
+#undef r_remHH
+#undef r_remHL
+#undef r_remH
+#undef r_remL
+#undef r_arg1HH
+#undef r_arg1HL
+#undef r_arg1H
+#undef r_arg1L
+#undef r_arg2HH
+#undef r_arg2HL
+#undef r_arg2H
+#undef r_arg2L
+#undef r_cnt
/*******************************************************
Division 64 / 64
@@ -2757,9 +2880,7 @@ DEFUN __fmulsu_exit
XJMP __fmul
1: XCALL __fmul
;; C = -C iff A0.7 = 1
- com C1
- neg C0
- sbci C1, -1
+ NEG2 C0
ret
ENDF __fmulsu_exit
#endif /* L_fmulsu */
@@ -2794,3 +2915,5 @@ ENDF __fmul
#undef B1
#undef C0
#undef C1
+
+#include "lib1funcs-fixed.S"