summaryrefslogtreecommitdiff
path: root/libgcc
diff options
context:
space:
mode:
authorgjl <gjl@138bc75d-0d04-0410-961f-82ee72b054a4>2012-08-24 12:42:48 +0000
committergjl <gjl@138bc75d-0d04-0410-961f-82ee72b054a4>2012-08-24 12:42:48 +0000
commit017c5b989ac3e3c65e0af360accad2f3ef281fad (patch)
tree9180eac44e1ace2c0794f565cff49b3847b05c2b /libgcc
parent5d34a30f668dc6540591e50a58fdddd143842f62 (diff)
downloadgcc-017c5b989ac3e3c65e0af360accad2f3ef281fad.tar.gz
libgcc/
PR target/54222 * config/avr/lib1funcs-fixed.S: New file. * config/avr/lib1funcs.S: Include it. Undefine some divmodsi after they are used. (neg2, neg4): New macros. (__mulqihi3,__umulqihi3,__mulhi3): Rewrite non-MUL variants. (__mulhisi3,__umulhisi3,__mulsi3): Rewrite non-MUL variants. (__umulhisi3): Speed up MUL variant if there is enough flash. * config/avr/avr-lib.h (TA, UTA): Adjust according to gcc's avr-modes.def. * config/avr/t-avr (LIB1ASMFUNCS): Add: _fractqqsf, _fractuqqsf, _fracthqsf, _fractuhqsf, _fracthasf, _fractuhasf, _fractsasf, _fractusasf, _fractsfqq, _fractsfuqq, _fractsfhq, _fractsfuhq, _fractsfha, _fractsfsa, _mulqq3, _muluqq3, _mulhq3, _muluhq3, _mulha3, _muluha3, _mulsa3, _mulusa3, _divqq3, _udivuqq3, _divhq3, _udivuhq3, _divha3, _udivuha3, _divsa3, _udivusa3. (LIB2FUNCS_EXCLUDE): Add supported functions. gcc/ PR target/54222 * avr-modes.def (HA, SA, DA, TA, UTA): Adjust modes. * avr/avr-fixed.md: New file. * avr/avr.md: Include it. (cc): Add: minus. (adjust_len): Add: minus, minus64, ufract, sfract. (ALL1, ALL2, ALL4, ORDERED234): New mode iterators. (MOVMODE): Add: QQ, UQQ, HQ, UHQ, HA, UHA, SQ, USQ, SA, USA. (MPUSH): Add: HQ, UHQ, HA, UHA, SQ, USQ, SA, USA. (pushqi1, xload8_A, xload_8, movqi_insn, *reload_inqi, addqi3, subqi3, ashlqi3, *ashlqi3, ashrqi3, lshrqi3, *lshrqi3, *cmpqi, cbranchqi4, *cpse.eq): Generalize to handle all 8-bit modes in ALL1. (*movhi, reload_inhi, addhi3, *addhi3, addhi3_clobber, subhi3, ashlhi3, *ashlhi3_const, ashrhi3, *ashirhi3_const, lshrhi3, *lshrhi3_const, *cmphi, cbranchhi4): Generalize to handle all 16-bit modes in ALL2. (subhi3, casesi, strlenhi): Add clobber when expanding minus:HI. (*movsi, *reload_insi, addsi3, subsi3, ashlsi3, *ashlsi3_const, ashrsi3, *ashrhi3_const, *ashrsi3_const, lshrsi3, *lshrsi3_const, *reversed_tstsi, *cmpsi, cbranchsi4): Generalize to handle all 32-bit modes in ALL4. * avr-dimode.md (ALL8): New mode iterator. (adddi3, adddi3_insn, adddi3_const_insn, subdi3, subdi3_insn, subdi3_const_insn, cbranchdi4, compare_di2, compare_const_di2, ashrdi3, lshrdi3, rotldi3, ashldi3_insn, ashrdi3_insn, lshrdi3_insn, rotldi3_insn): Generalize to handle all 64-bit modes in ALL8. * config/avr/avr-protos.h (avr_to_int_mode): New prototype. (avr_out_fract, avr_out_minus, avr_out_minus64): New prototypes. * config/avr/avr.c (TARGET_FIXED_POINT_SUPPORTED_P): Define to... (avr_fixed_point_supported_p): ...this new static function. (TARGET_BUILD_BUILTIN_VA_LIST): Define to... (avr_build_builtin_va_list): ...this new static function. (avr_adjust_type_node): New static function. (avr_scalar_mode_supported_p): Allow if ALL_FIXED_POINT_MODE_P. (avr_builtin_setjmp_frame_value): Use gen_subhi3 and return new pseudo instead of gen_rtx_MINUS. (avr_print_operand, avr_operand_rtx_cost): Handle: CONST_FIXED. (notice_update_cc): Handle: CC_MINUS. (output_movqi): Generalize to handle respective fixed-point modes. (output_movhi, output_movsisf, avr_2word_insn_p): Ditto. (avr_out_compare, avr_out_plus_1): Also handle fixed-point modes. (avr_assemble_integer): Ditto. (output_reload_in_const, output_reload_insisf): Ditto. (avr_compare_pattern): Skip all modes > 4 bytes. (avr_2word_insn_p): Skip movuqq_insn, movqq_insn. (avr_out_fract, avr_out_minus, avr_out_minus64): New functions. (avr_to_int_mode): New function. (adjust_insn_length): Handle: ADJUST_LEN_SFRACT, ADJUST_LEN_UFRACT, ADJUST_LEN_MINUS, ADJUST_LEN_MINUS64. * config/avr/predicates.md (const0_operand): Allow const_fixed. (const_operand, const_or_immediate_operand): New. (nonmemory_or_const_operand): New. * config/avr/constraints.md (Ynn, Y00, Y01, Y02, Ym1, Ym2, YIJ): New constraints. * config/avr/avr.h (LONG_LONG_ACCUM_TYPE_SIZE): Define. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@190644 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libgcc')
-rw-r--r--libgcc/ChangeLog20
-rw-r--r--libgcc/config/avr/avr-lib.h76
-rw-r--r--libgcc/config/avr/lib1funcs-fixed.S874
-rw-r--r--libgcc/config/avr/lib1funcs.S423
-rw-r--r--libgcc/config/avr/t-avr65
5 files changed, 1308 insertions, 150 deletions
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index 33ad5185479..60f19491d76 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,23 @@
+2012-08-24 Georg-Johann Lay <avr@gjlay.de>
+
+ PR target/54222
+ * config/avr/lib1funcs-fixed.S: New file.
+ * config/avr/lib1funcs.S: Include it. Undefine some divmodsi
+ after they are used.
+ (neg2, neg4): New macros.
+ (__mulqihi3,__umulqihi3,__mulhi3): Rewrite non-MUL variants.
+ (__mulhisi3,__umulhisi3,__mulsi3): Rewrite non-MUL variants.
+ (__umulhisi3): Speed up MUL variant if there is enough flash.
+ * config/avr/avr-lib.h (TA, UTA): Adjust according to gcc's
+ avr-modes.def.
+ * config/avr/t-avr (LIB1ASMFUNCS): Add: _fractqqsf, _fractuqqsf,
+ _fracthqsf, _fractuhqsf, _fracthasf, _fractuhasf, _fractsasf,
+ _fractusasf, _fractsfqq, _fractsfuqq, _fractsfhq, _fractsfuhq,
+ _fractsfha, _fractsfsa, _mulqq3, _muluqq3, _mulhq3, _muluhq3,
+ _mulha3, _muluha3, _mulsa3, _mulusa3, _divqq3, _udivuqq3, _divhq3,
+ _udivuhq3, _divha3, _udivuha3, _divsa3, _udivusa3.
+ (LIB2FUNCS_EXCLUDE): Add supported functions.
+
2012-08-22 Georg-Johann Lay <avr@gjlay.de>
* Makefile.in (fixed-funcs,fixed-conv-funcs): filter-out
diff --git a/libgcc/config/avr/avr-lib.h b/libgcc/config/avr/avr-lib.h
index daca4d81f9a..66082eb8a48 100644
--- a/libgcc/config/avr/avr-lib.h
+++ b/libgcc/config/avr/avr-lib.h
@@ -4,3 +4,79 @@
#define DI SI
typedef int QItype __attribute__ ((mode (QI)));
#endif
+
+/* fixed-bit.h does not define functions for TA and UTA because
+ that part is wrapped in #if MIN_UNITS_PER_WORD > 4.
+ This would lead to empty functions for TA and UTA.
+ Thus, supply appropriate defines as if HAVE_[U]TA == 1.
+ #define HAVE_[U]TA 1 won't work because avr-modes.def
+ uses ADJUST_BYTESIZE(TA,8) and fixed-bit.h is not generic enough
+ to arrange for such changes of the mode size. */
+
+typedef unsigned _Fract UTAtype __attribute__ ((mode (UTA)));
+
+#if defined (UTA_MODE)
+#define FIXED_SIZE 8 /* in bytes */
+#define INT_C_TYPE UDItype
+#define UINT_C_TYPE UDItype
+#define HINT_C_TYPE USItype
+#define HUINT_C_TYPE USItype
+#define MODE_NAME UTA
+#define MODE_NAME_S uta
+#define MODE_UNSIGNED 1
+#endif
+
+#if defined (FROM_UTA)
+#define FROM_TYPE 4 /* ID for fixed-point */
+#define FROM_MODE_NAME UTA
+#define FROM_MODE_NAME_S uta
+#define FROM_INT_C_TYPE UDItype
+#define FROM_SINT_C_TYPE DItype
+#define FROM_UINT_C_TYPE UDItype
+#define FROM_MODE_UNSIGNED 1
+#define FROM_FIXED_SIZE 8 /* in bytes */
+#elif defined (TO_UTA)
+#define TO_TYPE 4 /* ID for fixed-point */
+#define TO_MODE_NAME UTA
+#define TO_MODE_NAME_S uta
+#define TO_INT_C_TYPE UDItype
+#define TO_SINT_C_TYPE DItype
+#define TO_UINT_C_TYPE UDItype
+#define TO_MODE_UNSIGNED 1
+#define TO_FIXED_SIZE 8 /* in bytes */
+#endif
+
+/* Same for TAmode */
+
+typedef _Fract TAtype __attribute__ ((mode (TA)));
+
+#if defined (TA_MODE)
+#define FIXED_SIZE 8 /* in bytes */
+#define INT_C_TYPE DItype
+#define UINT_C_TYPE UDItype
+#define HINT_C_TYPE SItype
+#define HUINT_C_TYPE USItype
+#define MODE_NAME TA
+#define MODE_NAME_S ta
+#define MODE_UNSIGNED 0
+#endif
+
+#if defined (FROM_TA)
+#define FROM_TYPE 4 /* ID for fixed-point */
+#define FROM_MODE_NAME TA
+#define FROM_MODE_NAME_S ta
+#define FROM_INT_C_TYPE DItype
+#define FROM_SINT_C_TYPE DItype
+#define FROM_UINT_C_TYPE UDItype
+#define FROM_MODE_UNSIGNED 0
+#define FROM_FIXED_SIZE 8 /* in bytes */
+#elif defined (TO_TA)
+#define TO_TYPE 4 /* ID for fixed-point */
+#define TO_MODE_NAME TA
+#define TO_MODE_NAME_S ta
+#define TO_INT_C_TYPE DItype
+#define TO_SINT_C_TYPE DItype
+#define TO_UINT_C_TYPE UDItype
+#define TO_MODE_UNSIGNED 0
+#define TO_FIXED_SIZE 8 /* in bytes */
+#endif
diff --git a/libgcc/config/avr/lib1funcs-fixed.S b/libgcc/config/avr/lib1funcs-fixed.S
new file mode 100644
index 00000000000..c1aff53d5fd
--- /dev/null
+++ b/libgcc/config/avr/lib1funcs-fixed.S
@@ -0,0 +1,874 @@
+/* -*- Mode: Asm -*- */
+;; Copyright (C) 2012
+;; Free Software Foundation, Inc.
+;; Contributed by Sean D'Epagnier (sean@depagnier.com)
+;; Georg-Johann Lay (avr@gjlay.de)
+
+;; This file is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by the
+;; Free Software Foundation; either version 3, or (at your option) any
+;; later version.
+
+;; In addition to the permissions in the GNU General Public License, the
+;; Free Software Foundation gives you unlimited permission to link the
+;; compiled version of this file into combinations with other programs,
+;; and to distribute those combinations without any restriction coming
+;; from the use of this file. (The General Public License restrictions
+;; do apply in other respects; for example, they cover modification of
+;; the file, and distribution when not linked into a combine
+;; executable.)
+
+;; This file is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with this program; see the file COPYING. If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Fixed point library routines for AVR
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+.section .text.libgcc.fixed, "ax", @progbits
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Conversions to float
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+#if defined (L_fractqqsf)
+DEFUN __fractqqsf
+ ;; Move in place for SA -> SF conversion
+ clr r22
+ mov r23, r24
+ lsl r23
+ ;; Sign-extend
+ sbc r24, r24
+ mov r25, r24
+ XJMP __fractsasf
+ENDF __fractqqsf
+#endif /* L_fractqqsf */
+
+#if defined (L_fractuqqsf)
+DEFUN __fractuqqsf
+ ;; Move in place for USA -> SF conversion
+ clr r22
+ mov r23, r24
+ ;; Zero-extend
+ clr r24
+ clr r25
+ XJMP __fractusasf
+ENDF __fractuqqsf
+#endif /* L_fractuqqsf */
+
+#if defined (L_fracthqsf)
+DEFUN __fracthqsf
+ ;; Move in place for SA -> SF conversion
+ wmov 22, 24
+ lsl r22
+ rol r23
+ ;; Sign-extend
+ sbc r24, r24
+ mov r25, r24
+ XJMP __fractsasf
+ENDF __fracthqsf
+#endif /* L_fracthqsf */
+
+#if defined (L_fractuhqsf)
+DEFUN __fractuhqsf
+ ;; Move in place for USA -> SF conversion
+ wmov 22, 24
+ ;; Zero-extend
+ clr r24
+ clr r25
+ XJMP __fractusasf
+ENDF __fractuhqsf
+#endif /* L_fractuhqsf */
+
+#if defined (L_fracthasf)
+DEFUN __fracthasf
+ ;; Move in place for SA -> SF conversion
+ clr r22
+ mov r23, r24
+ mov r24, r25
+ ;; Sign-extend
+ lsl r25
+ sbc r25, r25
+ XJMP __fractsasf
+ENDF __fracthasf
+#endif /* L_fracthasf */
+
+#if defined (L_fractuhasf)
+DEFUN __fractuhasf
+ ;; Move in place for USA -> SF conversion
+ clr r22
+ mov r23, r24
+ mov r24, r25
+ ;; Zero-extend
+ clr r25
+ XJMP __fractusasf
+ENDF __fractuhasf
+#endif /* L_fractuhasf */
+
+
+#if defined (L_fractsqsf)
+DEFUN __fractsqsf
+ XCALL __floatsisf
+ ;; Divide non-zero results by 2^31 to move the
+ ;; decimal point into place
+ tst r25
+ breq 0f
+ subi r24, exp_lo (31)
+ sbci r25, exp_hi (31)
+0: ret
+ENDF __fractsqsf
+#endif /* L_fractsqsf */
+
+#if defined (L_fractusqsf)
+DEFUN __fractusqsf
+ XCALL __floatunsisf
+ ;; Divide non-zero results by 2^32 to move the
+ ;; decimal point into place
+ cpse r25, __zero_reg__
+ subi r25, exp_hi (32)
+ ret
+ENDF __fractusqsf
+#endif /* L_fractusqsf */
+
+#if defined (L_fractsasf)
+DEFUN __fractsasf
+ XCALL __floatsisf
+ ;; Divide non-zero results by 2^16 to move the
+ ;; decimal point into place
+ cpse r25, __zero_reg__
+ subi r25, exp_hi (16)
+ ret
+ENDF __fractsasf
+#endif /* L_fractsasf */
+
+#if defined (L_fractusasf)
+DEFUN __fractusasf
+ XCALL __floatunsisf
+ ;; Divide non-zero results by 2^16 to move the
+ ;; decimal point into place
+ cpse r25, __zero_reg__
+ subi r25, exp_hi (16)
+ ret
+ENDF __fractusasf
+#endif /* L_fractusasf */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Conversions from float
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+#if defined (L_fractsfqq)
+DEFUN __fractsfqq
+ ;; Multiply with 2^{24+7} to get a QQ result in r25
+ subi r24, exp_lo (-31)
+ sbci r25, exp_hi (-31)
+ XCALL __fixsfsi
+ mov r24, r25
+ ret
+ENDF __fractsfqq
+#endif /* L_fractsfqq */
+
+#if defined (L_fractsfuqq)
+DEFUN __fractsfuqq
+ ;; Multiply with 2^{24+8} to get a UQQ result in r25
+ subi r25, exp_hi (-32)
+ XCALL __fixunssfsi
+ mov r24, r25
+ ret
+ENDF __fractsfuqq
+#endif /* L_fractsfuqq */
+
+#if defined (L_fractsfha)
+DEFUN __fractsfha
+ ;; Multiply with 2^24 to get a HA result in r25:r24
+ subi r25, exp_hi (-24)
+ XJMP __fixsfsi
+ENDF __fractsfha
+#endif /* L_fractsfha */
+
+#if defined (L_fractsfuha)
+DEFUN __fractsfuha
+ ;; Multiply with 2^24 to get a UHA result in r25:r24
+ subi r25, exp_hi (-24)
+ XJMP __fixunssfsi
+ENDF __fractsfuha
+#endif /* L_fractsfuha */
+
+#if defined (L_fractsfhq)
+DEFUN __fractsfsq
+ENDF __fractsfsq
+
+DEFUN __fractsfhq
+ ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
+ ;; resp. with 2^31 to get a SQ result in r25:r22
+ subi r24, exp_lo (-31)
+ sbci r25, exp_hi (-31)
+ XJMP __fixsfsi
+ENDF __fractsfhq
+#endif /* L_fractsfhq */
+
+#if defined (L_fractsfuhq)
+DEFUN __fractsfusq
+ENDF __fractsfusq
+
+DEFUN __fractsfuhq
+ ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
+ ;; resp. with 2^32 to get a USQ result in r25:r22
+ subi r25, exp_hi (-32)
+ XJMP __fixunssfsi
+ENDF __fractsfuhq
+#endif /* L_fractsfuhq */
+
+#if defined (L_fractsfsa)
+DEFUN __fractsfsa
+ ;; Multiply with 2^16 to get a SA result in r25:r22
+ subi r25, exp_hi (-16)
+ XJMP __fixsfsi
+ENDF __fractsfsa
+#endif /* L_fractsfsa */
+
+#if defined (L_fractsfusa)
+DEFUN __fractsfusa
+ ;; Multiply with 2^16 to get a USA result in r25:r22
+ subi r25, exp_hi (-16)
+ XJMP __fixunssfsi
+ENDF __fractsfusa
+#endif /* L_fractsfusa */
+
+
+;; For multiplication the functions here are called directly from
+;; avr-fixed.md instead of using the standard libcall mechanisms.
+;; This can make better code because GCC knows exactly which
+;; of the call-used registers (not all of them) are clobbered. */
+
+/*******************************************************
+ Fractional Multiplication 8 x 8 without MUL
+*******************************************************/
+
+#if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
+;;; R23 = R24 * R25
+;;; Clobbers: __tmp_reg__, R22, R24, R25
+;;; Rounding: ???
+DEFUN __mulqq3
+ XCALL __fmuls
+ ;; TR 18037 requires that (-1) * (-1) does not overflow
+ ;; The only input that can produce -1 is (-1)^2.
+ dec r23
+ brvs 0f
+ inc r23
+0: ret
+ENDF __mulqq3
+#endif /* L_mulqq3 && ! HAVE_MUL */
+
+/*******************************************************
+ Fractional Multiply .16 x .16 with and without MUL
+*******************************************************/
+
+#if defined (L_mulhq3)
+;;; Same code with and without MUL, but the interfaces differ:
+;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
+;;; Clobbers: ABI, called by optabs
+;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
+;;; Clobbers: __tmp_reg__, R22, R23
+;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
+DEFUN __mulhq3
+ XCALL __mulhisi3
+ ;; Shift result into place
+ lsl r23
+ rol r24
+ rol r25
+ brvs 1f
+ ;; Round
+ sbrc r23, 7
+ adiw r24, 1
+ ret
+1: ;; Overflow. TR 18037 requires (-1)^2 not to overflow
+ ldi r24, lo8 (0x7fff)
+ ldi r25, hi8 (0x7fff)
+ ret
+ENDF __mulhq3
+#endif /* defined (L_mulhq3) */
+
+#if defined (L_muluhq3)
+;;; Same code with and without MUL, but the interfaces differ:
+;;; no MUL: (R25:R24) *= (R23:R22)
+;;; Clobbers: ABI, called by optabs
+;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
+;;; Clobbers: __tmp_reg__, R22, R23
+;;; Rounding: -0.5 LSB < error <= 0.5 LSB
+DEFUN __muluhq3
+ XCALL __umulhisi3
+ ;; Round
+ sbrc r23, 7
+ adiw r24, 1
+ ret
+ENDF __muluhq3
+#endif /* L_muluhq3 */
+
+
+/*******************************************************
+ Fixed Multiply 8.8 x 8.8 with and without MUL
+*******************************************************/
+
+#if defined (L_mulha3)
+;;; Same code with and without MUL, but the interfaces differ:
+;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
+;;; Clobbers: ABI, called by optabs
+;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
+;;; Clobbers: __tmp_reg__, R22, R23
+;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
+DEFUN __mulha3
+ XCALL __mulhisi3
+ XJMP __muluha3_round
+ENDF __mulha3
+#endif /* L_mulha3 */
+
+#if defined (L_muluha3)
+;;; Same code with and without MUL, but the interfaces differ:
+;;; no MUL: (R25:R24) *= (R23:R22)
+;;; Clobbers: ABI, called by optabs
+;;; MUL: (R25:R24) = (R19:R18) * (R27:R26)
+;;; Clobbers: __tmp_reg__, R22, R23
+;;; Rounding: -0.5 LSB < error <= 0.5 LSB
+DEFUN __muluha3
+ XCALL __umulhisi3
+ XJMP __muluha3_round
+ENDF __muluha3
+#endif /* L_muluha3 */
+
+#if defined (L_muluha3_round)
+DEFUN __muluha3_round
+ ;; Shift result into place
+ mov r25, r24
+ mov r24, r23
+ ;; Round
+ sbrc r22, 7
+ adiw r24, 1
+ ret
+ENDF __muluha3_round
+#endif /* L_muluha3_round */
+
+
+/*******************************************************
+ Fixed Multiplication 16.16 x 16.16
+*******************************************************/
+
+#if defined (__AVR_HAVE_MUL__)
+
+;; Multiplier
+#define A0 16
+#define A1 A0+1
+#define A2 A1+1
+#define A3 A2+1
+
+;; Multiplicand
+#define B0 20
+#define B1 B0+1
+#define B2 B1+1
+#define B3 B2+1
+
+;; Result
+#define C0 24
+#define C1 C0+1
+#define C2 C1+1
+#define C3 C2+1
+
+#if defined (L_mulusa3)
+;;; (C3:C0) = (A3:A0) * (B3:B0)
+;;; Clobbers: __tmp_reg__
+;;; Rounding: -0.5 LSB < error <= 0.5 LSB
+DEFUN __mulusa3
+ ;; Some of the MUL instructions have LSBs outside the result.
+ ;; Don't ignore these LSBs in order to tame rounding error.
+ ;; Use C2/C3 for these LSBs.
+
+ clr C0
+ clr C1
+ mul A0, B0 $ movw C2, r0
+
+ mul A1, B0 $ add C3, r0 $ adc C0, r1
+ mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1
+
+ ;; Round
+ sbrc C3, 7
+ adiw C0, 1
+
+ ;; The following MULs don't have LSBs outside the result.
+ ;; C2/C3 is the high part.
+
+ mul A0, B2 $ add C0, r0 $ adc C1, r1 $ sbc C2, C2
+ mul A1, B1 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
+ mul A2, B0 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0
+ neg C2
+
+ mul A0, B3 $ add C1, r0 $ adc C2, r1 $ sbc C3, C3
+ mul A1, B2 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
+ mul A2, B1 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
+ mul A3, B0 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0
+ neg C3
+
+ mul A1, B3 $ add C2, r0 $ adc C3, r1
+ mul A2, B2 $ add C2, r0 $ adc C3, r1
+ mul A3, B1 $ add C2, r0 $ adc C3, r1
+
+ mul A2, B3 $ add C3, r0
+ mul A3, B2 $ add C3, r0
+
+ clr __zero_reg__
+ ret
+ENDF __mulusa3
+#endif /* L_mulusa3 */
+
+#if defined (L_mulsa3)
+;;; (C3:C0) = (A3:A0) * (B3:B0)
+;;; Clobbers: __tmp_reg__
+;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
+DEFUN __mulsa3
+ XCALL __mulusa3
+ tst B3
+ brpl 1f
+ sub C2, A0
+ sbc C3, A1
+1: sbrs A3, 7
+ ret
+ sub C2, B0
+ sbc C3, B1
+ ret
+ENDF __mulsa3
+#endif /* L_mulsa3 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#else /* __AVR_HAVE_MUL__ */
+
+#define A0 18
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+
+#define B0 22
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define C0 22
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+
+;; __tmp_reg__
+#define CC0 0
+;; __zero_reg__
+#define CC1 1
+#define CC2 16
+#define CC3 17
+
+#define AA0 26
+#define AA1 AA0+1
+#define AA2 30
+#define AA3 AA2+1
+
+#if defined (L_mulsa3)
+;;; (R25:R22) *= (R21:R18)
+;;; Clobbers: ABI, called by optabs
+;;; Rounding: -1 LSB <= error <= 1 LSB
+DEFUN __mulsa3
+ push B0
+ push B1
+ bst B3, 7
+ XCALL __mulusa3
+ ;; A survived in 31:30:27:26
+ rcall 1f
+ pop AA1
+ pop AA0
+ bst AA3, 7
+1: brtc 9f
+ ;; 1-extend A/B
+ sub C2, AA0
+ sbc C3, AA1
+9: ret
+ENDF __mulsa3
+#endif /* L_mulsa3 */
+
+#if defined (L_mulusa3)
+;;; (R25:R22) *= (R21:R18)
+;;; Clobbers: ABI, called by optabs and __mulsua
+;;; Rounding: -1 LSB <= error <= 1 LSB
+;;; Does not clobber T and A[] survives in 26, 27, 30, 31
+DEFUN __mulusa3
+ push CC2
+ push CC3
+ ; clear result
+ clr __tmp_reg__
+ wmov CC2, CC0
+ ; save multiplicand
+ wmov AA0, A0
+ wmov AA2, A2
+ rjmp 3f
+
+ ;; Loop the integral part
+
+1: ;; CC += A * 2^n; n >= 0
+ add CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
+
+2: ;; A <<= 1
+ lsl A0 $ rol A1 $ rol A2 $ rol A3
+
+3: ;; IBIT(B) >>= 1
+ ;; Carry = n-th bit of B; n >= 0
+ lsr B3
+ ror B2
+ brcs 1b
+ sbci B3, 0
+ brne 2b
+
+ ;; Loop the fractional part
+ ;; B2/B3 is 0 now, use as guard bits for rounding
+ ;; Restore multiplicand
+ wmov A0, AA0
+ wmov A2, AA2
+ rjmp 5f
+
+4: ;; CC += A:Guard * 2^n; n < 0
+ add B3,B2 $ adc CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3
+5:
+ ;; A:Guard >>= 1
+ lsr A3 $ ror A2 $ ror A1 $ ror A0 $ ror B2
+
+ ;; FBIT(B) <<= 1
+ ;; Carry = n-th bit of B; n < 0
+ lsl B0
+ rol B1
+ brcs 4b
+ sbci B0, 0
+ brne 5b
+
+ ;; Move result into place and round
+ lsl B3
+ wmov C2, CC2
+ wmov C0, CC0
+ clr __zero_reg__
+ adc C0, __zero_reg__
+ adc C1, __zero_reg__
+ adc C2, __zero_reg__
+ adc C3, __zero_reg__
+
+ ;; Epilogue
+ pop CC3
+ pop CC2
+ ret
+ENDF __mulusa3
+#endif /* L_mulusa3 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef AA0
+#undef AA1
+#undef AA2
+#undef AA3
+#undef CC0
+#undef CC1
+#undef CC2
+#undef CC3
+
+#endif /* __AVR_HAVE_MUL__ */
+
+/*******************************************************
+ Fractional Division 8 / 8
+*******************************************************/
+
+#define r_divd r25 /* dividend */
+#define r_quo r24 /* quotient */
+#define r_div r22 /* divisor */
+
+#if defined (L_divqq3)
+DEFUN __divqq3
+ mov r0, r_divd
+ eor r0, r_div
+ sbrc r_div, 7
+ neg r_div
+ sbrc r_divd, 7
+ neg r_divd
+ cp r_divd, r_div
+ breq __divqq3_minus1 ; if equal return -1
+ XCALL __udivuqq3
+ lsr r_quo
+ sbrc r0, 7 ; negate result if needed
+ neg r_quo
+ ret
+__divqq3_minus1:
+ ldi r_quo, 0x80
+ ret
+ENDF __divqq3
+#endif /* defined (L_divqq3) */
+
+#if defined (L_udivuqq3)
+DEFUN __udivuqq3
+ clr r_quo ; clear quotient
+ inc __zero_reg__ ; init loop counter, used per shift
+__udivuqq3_loop:
+ lsl r_divd ; shift dividend
+ brcs 0f ; dividend overflow
+ cp r_divd,r_div ; compare dividend & divisor
+ brcc 0f ; dividend >= divisor
+ rol r_quo ; shift quotient (with CARRY)
+ rjmp __udivuqq3_cont
+0:
+ sub r_divd,r_div ; restore dividend
+ lsl r_quo ; shift quotient (without CARRY)
+__udivuqq3_cont:
+ lsl __zero_reg__ ; shift loop-counter bit
+ brne __udivuqq3_loop
+ com r_quo ; complement result
+ ; because C flag was complemented in loop
+ ret
+ENDF __udivuqq3
+#endif /* defined (L_udivuqq3) */
+
+#undef r_divd
+#undef r_quo
+#undef r_div
+
+
+/*******************************************************
+ Fractional Division 16 / 16
+*******************************************************/
+#define r_divdL 26 /* dividend Low */
+#define r_divdH 27 /* dividend Hig */
+#define r_quoL 24 /* quotient Low */
+#define r_quoH 25 /* quotient High */
+#define r_divL 22 /* divisor */
+#define r_divH 23 /* divisor */
+#define r_cnt 21
+
+#if defined (L_divhq3)
+DEFUN __divhq3
+ mov r0, r_divdH
+ eor r0, r_divH
+ sbrs r_divH, 7
+ rjmp 1f
+ NEG2 r_divL
+1:
+ sbrs r_divdH, 7
+ rjmp 2f
+ NEG2 r_divdL
+2:
+ cp r_divdL, r_divL
+ cpc r_divdH, r_divH
+ breq __divhq3_minus1 ; if equal return -1
+ XCALL __udivuhq3
+ lsr r_quoH
+ ror r_quoL
+ brpl 9f
+ ;; negate result if needed
+ NEG2 r_quoL
+9:
+ ret
+__divhq3_minus1:
+ ldi r_quoH, 0x80
+ clr r_quoL
+ ret
+ENDF __divhq3
+#endif /* defined (L_divhq3) */
+
+#if defined (L_udivuhq3)
+DEFUN __udivuhq3
+ sub r_quoH,r_quoH ; clear quotient and carry
+ ;; FALLTHRU
+ENDF __udivuhq3
+
+DEFUN __udivuha3_common
+ clr r_quoL ; clear quotient
+ ldi r_cnt,16 ; init loop counter
+__udivuhq3_loop:
+ rol r_divdL ; shift dividend (with CARRY)
+ rol r_divdH
+ brcs __udivuhq3_ep ; dividend overflow
+ cp r_divdL,r_divL ; compare dividend & divisor
+ cpc r_divdH,r_divH
+ brcc __udivuhq3_ep ; dividend >= divisor
+ rol r_quoL ; shift quotient (with CARRY)
+ rjmp __udivuhq3_cont
+__udivuhq3_ep:
+ sub r_divdL,r_divL ; restore dividend
+ sbc r_divdH,r_divH
+ lsl r_quoL ; shift quotient (without CARRY)
+__udivuhq3_cont:
+ rol r_quoH ; shift quotient
+ dec r_cnt ; decrement loop counter
+ brne __udivuhq3_loop
+ com r_quoL ; complement result
+ com r_quoH ; because C flag was complemented in loop
+ ret
+ENDF __udivuha3_common
+#endif /* defined (L_udivuhq3) */
+
+/*******************************************************
+ Fixed Division 8.8 / 8.8
+*******************************************************/
+#if defined (L_divha3)
+DEFUN __divha3
+ mov r0, r_divdH
+ eor r0, r_divH
+ sbrs r_divH, 7
+ rjmp 1f
+ NEG2 r_divL
+1:
+ sbrs r_divdH, 7
+ rjmp 2f
+ NEG2 r_divdL
+2:
+ XCALL __udivuha3
+ sbrs r0, 7 ; negate result if needed
+ ret
+ NEG2 r_quoL
+ ret
+ENDF __divha3
+#endif /* defined (L_divha3) */
+
+#if defined (L_udivuha3)
+DEFUN __udivuha3
+ mov r_quoH, r_divdL
+ mov r_divdL, r_divdH
+ clr r_divdH
+ lsl r_quoH ; shift quotient into carry
+ XJMP __udivuha3_common ; same as fractional after rearrange
+ENDF __udivuha3
+#endif /* defined (L_udivuha3) */
+
+#undef r_divdL
+#undef r_divdH
+#undef r_quoL
+#undef r_quoH
+#undef r_divL
+#undef r_divH
+#undef r_cnt
+
+/*******************************************************
+ Fixed Division 16.16 / 16.16
+*******************************************************/
+
+#define r_arg1L 24 /* arg1 gets passed already in place */
+#define r_arg1H 25
+#define r_arg1HL 26
+#define r_arg1HH 27
+#define r_divdL 26 /* dividend Low */
+#define r_divdH 27
+#define r_divdHL 30
+#define r_divdHH 31 /* dividend High */
+#define r_quoL 22 /* quotient Low */
+#define r_quoH 23
+#define r_quoHL 24
+#define r_quoHH 25 /* quotient High */
+#define r_divL 18 /* divisor Low */
+#define r_divH 19
+#define r_divHL 20
+#define r_divHH 21 /* divisor High */
+#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */
+
+#if defined (L_divsa3)
+DEFUN __divsa3
+ mov r0, r_arg1HH
+ eor r0, r_divHH
+ sbrs r_divHH, 7
+ rjmp 1f
+ NEG4 r_divL
+1:
+ sbrs r_arg1HH, 7
+ rjmp 2f
+ NEG4 r_arg1L
+2:
+ XCALL __udivusa3
+ sbrs r0, 7 ; negate result if needed
+ ret
+ NEG4 r_quoL
+ ret
+ENDF __divsa3
+#endif /* defined (L_divsa3) */
+
+#if defined (L_udivusa3)
+DEFUN __udivusa3
+ ldi r_divdHL, 32 ; init loop counter
+ mov r_cnt, r_divdHL
+ clr r_divdHL
+ clr r_divdHH
+ wmov r_quoL, r_divdHL
+ lsl r_quoHL ; shift quotient into carry
+ rol r_quoHH
+__udivusa3_loop:
+ rol r_divdL ; shift dividend (with CARRY)
+ rol r_divdH
+ rol r_divdHL
+ rol r_divdHH
+ brcs __udivusa3_ep ; dividend overflow
+ cp r_divdL,r_divL ; compare dividend & divisor
+ cpc r_divdH,r_divH
+ cpc r_divdHL,r_divHL
+ cpc r_divdHH,r_divHH
+ brcc __udivusa3_ep ; dividend >= divisor
+ rol r_quoL ; shift quotient (with CARRY)
+ rjmp __udivusa3_cont
+__udivusa3_ep:
+ sub r_divdL,r_divL ; restore dividend
+ sbc r_divdH,r_divH
+ sbc r_divdHL,r_divHL
+ sbc r_divdHH,r_divHH
+ lsl r_quoL ; shift quotient (without CARRY)
+__udivusa3_cont:
+ rol r_quoH ; shift quotient
+ rol r_quoHL
+ rol r_quoHH
+ dec r_cnt ; decrement loop counter
+ brne __udivusa3_loop
+ com r_quoL ; complement result
+ com r_quoH ; because C flag was complemented in loop
+ com r_quoHL
+ com r_quoHH
+ ret
+ENDF __udivusa3
+#endif /* defined (L_udivusa3) */
+
+#undef r_arg1L
+#undef r_arg1H
+#undef r_arg1HL
+#undef r_arg1HH
+#undef r_divdL
+#undef r_divdH
+#undef r_divdHL
+#undef r_divdHH
+#undef r_quoL
+#undef r_quoH
+#undef r_quoHL
+#undef r_quoHH
+#undef r_divL
+#undef r_divH
+#undef r_divHL
+#undef r_divHH
+#undef r_cnt
diff --git a/libgcc/config/avr/lib1funcs.S b/libgcc/config/avr/lib1funcs.S
index 95a7d3d4eeb..6b9879ee7d7 100644
--- a/libgcc/config/avr/lib1funcs.S
+++ b/libgcc/config/avr/lib1funcs.S
@@ -91,6 +91,35 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
.endfunc
.endm
+;; Negate a 2-byte value held in consecutive registers
+.macro NEG2 reg
+ com \reg+1
+ neg \reg
+ sbci \reg+1, -1
+.endm
+
+;; Negate a 4-byte value held in consecutive registers
+.macro NEG4 reg
+ com \reg+3
+ com \reg+2
+ com \reg+1
+.if \reg >= 16
+ neg \reg
+ sbci \reg+1, -1
+ sbci \reg+2, -1
+ sbci \reg+3, -1
+.else
+ com \reg
+ adc \reg, __zero_reg__
+ adc \reg+1, __zero_reg__
+ adc \reg+2, __zero_reg__
+ adc \reg+3, __zero_reg__
+.endif
+.endm
+
+#define exp_lo(N) hlo8 ((N) << 23)
+#define exp_hi(N) hhi8 ((N) << 23)
+
.section .text.libgcc.mul, "ax", @progbits
@@ -126,175 +155,246 @@ ENDF __mulqi3
#endif /* defined (L_mulqi3) */
-#if defined (L_mulqihi3)
-DEFUN __mulqihi3
- clr r25
- sbrc r24, 7
- dec r25
- clr r23
- sbrc r22, 7
- dec r22
- XJMP __mulhi3
-ENDF __mulqihi3:
-#endif /* defined (L_mulqihi3) */
+
+/*******************************************************
+ Widening Multiplication 16 = 8 x 8 without MUL
+ Multiplication 16 x 16 without MUL
+*******************************************************/
+
+#define A0 r22
+#define A1 r23
+#define B0 r24
+#define BB0 r20
+#define B1 r25
+;; Output overlaps input, thus expand result in CC0/1
+#define C0 r24
+#define C1 r25
+#define CC0 __tmp_reg__
+#define CC1 R21
#if defined (L_umulqihi3)
+;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
+;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0
+;;; Clobbers: __tmp_reg__, R21..R23
DEFUN __umulqihi3
- clr r25
- clr r23
- XJMP __mulhi3
+ clr A1
+ clr B1
+ XJMP __mulhi3
ENDF __umulqihi3
-#endif /* defined (L_umulqihi3) */
+#endif /* L_umulqihi3 */
-/*******************************************************
- Multiplication 16 x 16 without MUL
-*******************************************************/
-#if defined (L_mulhi3)
-#define r_arg1L r24 /* multiplier Low */
-#define r_arg1H r25 /* multiplier High */
-#define r_arg2L r22 /* multiplicand Low */
-#define r_arg2H r23 /* multiplicand High */
-#define r_resL __tmp_reg__ /* result Low */
-#define r_resH r21 /* result High */
+#if defined (L_mulqihi3)
+;;; R25:R24 = (signed int) R22 * (signed int) R24
+;;; (C1:C0) = (signed int) A0 * (signed int) B0
+;;; Clobbers: __tmp_reg__, R20..R23
+DEFUN __mulqihi3
+ ;; Sign-extend B0
+ clr B1
+ sbrc B0, 7
+ com B1
+ ;; The multiplication runs twice as fast if A1 is zero, thus:
+ ;; Zero-extend A0
+ clr A1
+#ifdef __AVR_HAVE_JMP_CALL__
+ ;; Store B0 * sign of A
+ clr BB0
+ sbrc A0, 7
+ mov BB0, B0
+ call __mulhi3
+#else /* have no CALL */
+ ;; Skip sign-extension of A if A >= 0
+ ;; Same size as with the first alternative but avoids errata skip
+ ;; and is faster if A >= 0
+ sbrs A0, 7
+ rjmp __mulhi3
+ ;; If A < 0 store B
+ mov BB0, B0
+ rcall __mulhi3
+#endif /* HAVE_JMP_CALL */
+ ;; 1-extend A after the multiplication
+ sub C1, BB0
+ ret
+ENDF __mulqihi3
+#endif /* L_mulqihi3 */
+#if defined (L_mulhi3)
+;;; R25:R24 = R23:R22 * R25:R24
+;;; (C1:C0) = (A1:A0) * (B1:B0)
+;;; Clobbers: __tmp_reg__, R21..R23
DEFUN __mulhi3
- clr r_resH ; clear result
- clr r_resL ; clear result
-__mulhi3_loop:
- sbrs r_arg1L,0
- rjmp __mulhi3_skip1
- add r_resL,r_arg2L ; result + multiplicand
- adc r_resH,r_arg2H
-__mulhi3_skip1:
- add r_arg2L,r_arg2L ; shift multiplicand
- adc r_arg2H,r_arg2H
-
- cp r_arg2L,__zero_reg__
- cpc r_arg2H,__zero_reg__
- breq __mulhi3_exit ; while multiplicand != 0
-
- lsr r_arg1H ; gets LSB of multiplier
- ror r_arg1L
- sbiw r_arg1L,0
- brne __mulhi3_loop ; exit if multiplier = 0
-__mulhi3_exit:
- mov r_arg1H,r_resH ; result to return register
- mov r_arg1L,r_resL
- ret
-ENDF __mulhi3
-#undef r_arg1L
-#undef r_arg1H
-#undef r_arg2L
-#undef r_arg2H
-#undef r_resL
-#undef r_resH
+ ;; Clear result
+ clr CC0
+ clr CC1
+ rjmp 3f
+1:
+ ;; Bit n of A is 1 --> C += B << n
+ add CC0, B0
+ adc CC1, B1
+2:
+ lsl B0
+ rol B1
+3:
+ ;; If B == 0 we are ready
+ sbiw B0, 0
+ breq 9f
+
+ ;; Carry = n-th bit of A
+ lsr A1
+ ror A0
+ ;; If bit n of A is set, then go add B * 2^n to C
+ brcs 1b
+
+ ;; Carry = 0 --> The ROR above acts like CP A0, 0
+ ;; Thus, it is sufficient to CPC the high part to test A against 0
+ cpc A1, __zero_reg__
+ ;; Only proceed if A != 0
+ brne 2b
+9:
+ ;; Move Result into place
+ mov C0, CC0
+ mov C1, CC1
+ ret
+ENDF __mulhi3
+#endif /* L_mulhi3 */
-#endif /* defined (L_mulhi3) */
+#undef A0
+#undef A1
+#undef B0
+#undef BB0
+#undef B1
+#undef C0
+#undef C1
+#undef CC0
+#undef CC1
+
+
+#define A0 22
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+
+#define B0 18
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define CC0 26
+#define CC1 CC0+1
+#define CC2 30
+#define CC3 CC2+1
+
+#define C0 22
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
/*******************************************************
Widening Multiplication 32 = 16 x 16 without MUL
*******************************************************/
-#if defined (L_mulhisi3)
-DEFUN __mulhisi3
-;;; FIXME: This is dead code (noone calls it)
- mov_l r18, r24
- mov_h r19, r25
- clr r24
- sbrc r23, 7
- dec r24
- mov r25, r24
- clr r20
- sbrc r19, 7
- dec r20
- mov r21, r20
- XJMP __mulsi3
-ENDF __mulhisi3
-#endif /* defined (L_mulhisi3) */
-
#if defined (L_umulhisi3)
DEFUN __umulhisi3
-;;; FIXME: This is dead code (noone calls it)
- mov_l r18, r24
- mov_h r19, r25
- clr r24
- clr r25
- mov_l r20, r24
- mov_h r21, r25
+ wmov B0, 24
+ ;; Zero-extend B
+ clr B2
+ clr B3
+ ;; Zero-extend A
+ wmov A2, B2
XJMP __mulsi3
ENDF __umulhisi3
-#endif /* defined (L_umulhisi3) */
+#endif /* L_umulhisi3 */
+
+#if defined (L_mulhisi3)
+DEFUN __mulhisi3
+ wmov B0, 24
+ ;; Sign-extend B
+ lsl r25
+ sbc B2, B2
+ mov B3, B2
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+ ;; Sign-extend A
+ clr A2
+ sbrc A1, 7
+ com A2
+ mov A3, A2
+ XJMP __mulsi3
+#else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */
+ ;; Zero-extend A and __mulsi3 will run at least twice as fast
+ ;; compared to a sign-extended A.
+ clr A2
+ clr A3
+ sbrs A1, 7
+ XJMP __mulsi3
+ ;; If A < 0 then perform the B * 0xffff.... before the
+ ;; very multiplication by initializing the high part of the
+ ;; result CC with -B.
+ wmov CC2, A2
+ sub CC2, B0
+ sbc CC3, B1
+ XJMP __mulsi3_helper
+#endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */
+ENDF __mulhisi3
+#endif /* L_mulhisi3 */
+
-#if defined (L_mulsi3)
/*******************************************************
Multiplication 32 x 32 without MUL
*******************************************************/
-#define r_arg1L r22 /* multiplier Low */
-#define r_arg1H r23
-#define r_arg1HL r24
-#define r_arg1HH r25 /* multiplier High */
-
-#define r_arg2L r18 /* multiplicand Low */
-#define r_arg2H r19
-#define r_arg2HL r20
-#define r_arg2HH r21 /* multiplicand High */
-
-#define r_resL r26 /* result Low */
-#define r_resH r27
-#define r_resHL r30
-#define r_resHH r31 /* result High */
+#if defined (L_mulsi3)
DEFUN __mulsi3
- clr r_resHH ; clear result
- clr r_resHL ; clear result
- clr r_resH ; clear result
- clr r_resL ; clear result
-__mulsi3_loop:
- sbrs r_arg1L,0
- rjmp __mulsi3_skip1
- add r_resL,r_arg2L ; result + multiplicand
- adc r_resH,r_arg2H
- adc r_resHL,r_arg2HL
- adc r_resHH,r_arg2HH
-__mulsi3_skip1:
- add r_arg2L,r_arg2L ; shift multiplicand
- adc r_arg2H,r_arg2H
- adc r_arg2HL,r_arg2HL
- adc r_arg2HH,r_arg2HH
-
- lsr r_arg1HH ; gets LSB of multiplier
- ror r_arg1HL
- ror r_arg1H
- ror r_arg1L
- brne __mulsi3_loop
- sbiw r_arg1HL,0
- cpc r_arg1H,r_arg1L
- brne __mulsi3_loop ; exit if multiplier = 0
-__mulsi3_exit:
- mov_h r_arg1HH,r_resHH ; result to return register
- mov_l r_arg1HL,r_resHL
- mov_h r_arg1H,r_resH
- mov_l r_arg1L,r_resL
- ret
-ENDF __mulsi3
+ ;; Clear result
+ clr CC2
+ clr CC3
+ ;; FALLTHRU
+ENDF __mulsi3
-#undef r_arg1L
-#undef r_arg1H
-#undef r_arg1HL
-#undef r_arg1HH
-
-#undef r_arg2L
-#undef r_arg2H
-#undef r_arg2HL
-#undef r_arg2HH
-
-#undef r_resL
-#undef r_resH
-#undef r_resHL
-#undef r_resHH
+DEFUN __mulsi3_helper
+ clr CC0
+ clr CC1
+ rjmp 3f
+
+1: ;; If bit n of A is set, then add B * 2^n to the result in CC
+ ;; CC += B
+ add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3
+
+2: ;; B <<= 1
+ lsl B0 $ rol B1 $ rol B2 $ rol B3
+
+3: ;; A >>= 1: Carry = n-th bit of A
+ lsr A3 $ ror A2 $ ror A1 $ ror A0
+
+ brcs 1b
+ ;; Only continue if A != 0
+ sbci A1, 0
+ brne 2b
+ sbiw A2, 0
+ brne 2b
+
+ ;; All bits of A are consumed: Copy result to return register C
+ wmov C0, CC0
+ wmov C2, CC2
+ ret
+ENDF __mulsi3_helper
+#endif /* L_mulsi3 */
-#endif /* defined (L_mulsi3) */
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef CC0
+#undef CC1
+#undef CC2
+#undef CC3
#endif /* !defined (__AVR_HAVE_MUL__) */
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -316,7 +416,7 @@ ENDF __mulsi3
#define C3 C0+3
/*******************************************************
- Widening Multiplication 32 = 16 x 16
+ Widening Multiplication 32 = 16 x 16 with MUL
*******************************************************/
#if defined (L_mulhisi3)
@@ -364,7 +464,17 @@ DEFUN __umulhisi3
mul A1, B1
movw C2, r0
mul A0, B1
+#ifdef __AVR_HAVE_JMP_CALL__
+ ;; This function is used by many other routines, often multiple times.
+ ;; Therefore, if the flash size is not too limited, avoid the RCALL
+ ;; and inverst 6 Bytes to speed things up.
+ add C1, r0
+ adc C2, r1
+ clr __zero_reg__
+ adc C3, __zero_reg__
+#else
rcall 1f
+#endif
mul A1, B0
1: add C1, r0
adc C2, r1
@@ -375,7 +485,7 @@ ENDF __umulhisi3
#endif /* L_umulhisi3 */
/*******************************************************
- Widening Multiplication 32 = 16 x 32
+ Widening Multiplication 32 = 16 x 32 with MUL
*******************************************************/
#if defined (L_mulshisi3)
@@ -425,7 +535,7 @@ ENDF __muluhisi3
#endif /* L_muluhisi3 */
/*******************************************************
- Multiplication 32 x 32
+ Multiplication 32 x 32 with MUL
*******************************************************/
#if defined (L_mulsi3)
@@ -468,7 +578,7 @@ ENDF __mulsi3
#endif /* __AVR_HAVE_MUL__ */
/*******************************************************
- Multiplication 24 x 24
+ Multiplication 24 x 24 with MUL
*******************************************************/
#if defined (L_mulpsi3)
@@ -1247,6 +1357,19 @@ __divmodsi4_exit:
ENDF __divmodsi4
#endif /* defined (L_divmodsi4) */
+#undef r_remHH
+#undef r_remHL
+#undef r_remH
+#undef r_remL
+#undef r_arg1HH
+#undef r_arg1HL
+#undef r_arg1H
+#undef r_arg1L
+#undef r_arg2HH
+#undef r_arg2HL
+#undef r_arg2H
+#undef r_arg2L
+#undef r_cnt
/*******************************************************
Division 64 / 64
@@ -2757,9 +2880,7 @@ DEFUN __fmulsu_exit
XJMP __fmul
1: XCALL __fmul
;; C = -C iff A0.7 = 1
- com C1
- neg C0
- sbci C1, -1
+ NEG2 C0
ret
ENDF __fmulsu_exit
#endif /* L_fmulsu */
@@ -2794,3 +2915,5 @@ ENDF __fmul
#undef B1
#undef C0
#undef C1
+
+#include "lib1funcs-fixed.S"
diff --git a/libgcc/config/avr/t-avr b/libgcc/config/avr/t-avr
index 43caa94ca2a..6f783cd9d52 100644
--- a/libgcc/config/avr/t-avr
+++ b/libgcc/config/avr/t-avr
@@ -2,6 +2,7 @@ LIB1ASMSRC = avr/lib1funcs.S
LIB1ASMFUNCS = \
_mulqi3 \
_mulhi3 \
+ _mulqihi3 _umulqihi3 \
_mulpsi3 _mulsqipsi3 \
_mulhisi3 \
_umulhisi3 \
@@ -55,6 +56,24 @@ LIB1ASMFUNCS = \
_cmpdi2 _cmpdi2_s8 \
_fmul _fmuls _fmulsu
+# Fixed point routines in avr/lib1funcs-fixed.S
+LIB1ASMFUNCS += \
+ _fractqqsf _fractuqqsf \
+ _fracthqsf _fractuhqsf _fracthasf _fractuhasf \
+ _fractsasf _fractusasf _fractsqsf _fractusqsf \
+ \
+ _fractsfqq _fractsfuqq \
+ _fractsfhq _fractsfuhq _fractsfha _fractsfuha \
+ _fractsfsa _fractsfusa \
+ _mulqq3 \
+ _mulhq3 _muluhq3 \
+ _mulha3 _muluha3 _muluha3_round \
+ _mulsa3 _mulusa3 \
+ _divqq3 _udivuqq3 \
+ _divhq3 _udivuhq3 \
+ _divha3 _udivuha3 \
+ _divsa3 _udivusa3
+
LIB2FUNCS_EXCLUDE = \
_moddi3 _umoddi3 \
_clz
@@ -81,3 +100,49 @@ libgcc-objects += $(patsubst %,%$(objext),$(hiintfuncs16))
ifeq ($(enable_shared),yes)
libgcc-s-objects += $(patsubst %,%_s$(objext),$(hiintfuncs16))
endif
+
+
+# Filter out supported conversions from fixed-bit.c
+
+conv_XY=$(conv)$(mode1)$(mode2)
+conv_X=$(conv)$(mode)
+
+# Conversions supported by the compiler
+
+convf_modes = QI UQI QQ UQQ \
+ HI UHI HQ UHQ HA UHA \
+ SI USI SQ USQ SA USA \
+ DI UDI DQ UDQ DA UDA \
+ TI UTI TQ UTQ TA UTA
+
+LIB2FUNCS_EXCLUDE += \
+ $(foreach conv,_fract _fractuns,\
+ $(foreach mode1,$(convf_modes),\
+ $(foreach mode2,$(convf_modes),$(conv_XY))))
+
+# Conversions supported by lib1funcs-fixed.S
+
+conv_to_sf_modes = QQ UQQ HQ UHQ HA UHA SQ USQ SA USA
+conv_from_sf_modes = QQ UQQ HQ UHQ HA UHA SA USA
+
+LIB2FUNCS_EXCLUDE += \
+ $(foreach conv,_fract, \
+ $(foreach mode1,$(conv_to_sf_modes), \
+ $(foreach mode2,SF,$(conv_XY))))
+
+LIB2FUNCS_EXCLUDE += \
+ $(foreach conv,_fract,\
+ $(foreach mode1,SF,\
+ $(foreach mode2,$(conv_from_sf_modes),$(conv_XY))))
+
+# Arithmetik supported by the compiler
+
+allfix_modes = QQ UQQ HQ UHQ HA UHA SQ USQ SA USA DA UDA DQ UDQ TQ UTQ TA UTA
+
+LIB2FUNCS_EXCLUDE += \
+ $(foreach conv,_add _sub,\
+ $(foreach mode,$(allfix_modes),$(conv_X)3))
+
+LIB2FUNCS_EXCLUDE += \
+ $(foreach conv,_lshr _ashl _ashr _cmp,\
+ $(foreach mode,$(allfix_modes),$(conv_X)))