libgcc/

PR target/54222 * config/avr/lib1funcs-fixed.S: New file. * config/avr/lib1funcs.S: Include it. Undefine some divmodsi after they are used. (neg2, neg4): New macros. (__mulqihi3,__umulqihi3,__mulhi3): Rewrite non-MUL variants. (__mulhisi3,__umulhisi3,__mulsi3): Rewrite non-MUL variants. (__umulhisi3): Speed up MUL variant if there is enough flash. * config/avr/avr-lib.h (TA, UTA): Adjust according to gcc's avr-modes.def. * config/avr/t-avr (LIB1ASMFUNCS): Add: _fractqqsf, _fractuqqsf, _fracthqsf, _fractuhqsf, _fracthasf, _fractuhasf, _fractsasf, _fractusasf, _fractsfqq, _fractsfuqq, _fractsfhq, _fractsfuhq, _fractsfha, _fractsfsa, _mulqq3, _muluqq3, _mulhq3, _muluhq3, _mulha3, _muluha3, _mulsa3, _mulusa3, _divqq3, _udivuqq3, _divhq3, _udivuhq3, _divha3, _udivuha3, _divsa3, _udivusa3. (LIB2FUNCS_EXCLUDE): Add supported functions. gcc/ PR target/54222 * avr-modes.def (HA, SA, DA, TA, UTA): Adjust modes. * avr/avr-fixed.md: New file. * avr/avr.md: Include it. (cc): Add: minus. (adjust_len): Add: minus, minus64, ufract, sfract. (ALL1, ALL2, ALL4, ORDERED234): New mode iterators. (MOVMODE): Add: QQ, UQQ, HQ, UHQ, HA, UHA, SQ, USQ, SA, USA. (MPUSH): Add: HQ, UHQ, HA, UHA, SQ, USQ, SA, USA. (pushqi1, xload8_A, xload_8, movqi_insn, *reload_inqi, addqi3, subqi3, ashlqi3, *ashlqi3, ashrqi3, lshrqi3, *lshrqi3, *cmpqi, cbranchqi4, *cpse.eq): Generalize to handle all 8-bit modes in ALL1. (*movhi, reload_inhi, addhi3, *addhi3, addhi3_clobber, subhi3, ashlhi3, *ashlhi3_const, ashrhi3, *ashirhi3_const, lshrhi3, *lshrhi3_const, *cmphi, cbranchhi4): Generalize to handle all 16-bit modes in ALL2. (subhi3, casesi, strlenhi): Add clobber when expanding minus:HI. (*movsi, *reload_insi, addsi3, subsi3, ashlsi3, *ashlsi3_const, ashrsi3, *ashrhi3_const, *ashrsi3_const, lshrsi3, *lshrsi3_const, *reversed_tstsi, *cmpsi, cbranchsi4): Generalize to handle all 32-bit modes in ALL4. * avr-dimode.md (ALL8): New mode iterator. (adddi3, adddi3_insn, adddi3_const_insn, subdi3, subdi3_insn, subdi3_const_insn, cbranchdi4, compare_di2, compare_const_di2, ashrdi3, lshrdi3, rotldi3, ashldi3_insn, ashrdi3_insn, lshrdi3_insn, rotldi3_insn): Generalize to handle all 64-bit modes in ALL8. * config/avr/avr-protos.h (avr_to_int_mode): New prototype. (avr_out_fract, avr_out_minus, avr_out_minus64): New prototypes. * config/avr/avr.c (TARGET_FIXED_POINT_SUPPORTED_P): Define to... (avr_fixed_point_supported_p): ...this new static function. (TARGET_BUILD_BUILTIN_VA_LIST): Define to... (avr_build_builtin_va_list): ...this new static function. (avr_adjust_type_node): New static function. (avr_scalar_mode_supported_p): Allow if ALL_FIXED_POINT_MODE_P. (avr_builtin_setjmp_frame_value): Use gen_subhi3 and return new pseudo instead of gen_rtx_MINUS. (avr_print_operand, avr_operand_rtx_cost): Handle: CONST_FIXED. (notice_update_cc): Handle: CC_MINUS. (output_movqi): Generalize to handle respective fixed-point modes. (output_movhi, output_movsisf, avr_2word_insn_p): Ditto. (avr_out_compare, avr_out_plus_1): Also handle fixed-point modes. (avr_assemble_integer): Ditto. (output_reload_in_const, output_reload_insisf): Ditto. (avr_compare_pattern): Skip all modes > 4 bytes. (avr_2word_insn_p): Skip movuqq_insn, movqq_insn. (avr_out_fract, avr_out_minus, avr_out_minus64): New functions. (avr_to_int_mode): New function. (adjust_insn_length): Handle: ADJUST_LEN_SFRACT, ADJUST_LEN_UFRACT, ADJUST_LEN_MINUS, ADJUST_LEN_MINUS64. * config/avr/predicates.md (const0_operand): Allow const_fixed. (const_operand, const_or_immediate_operand): New. (nonmemory_or_const_operand): New. * config/avr/constraints.md (Ynn, Y00, Y01, Y02, Ym1, Ym2, YIJ): New constraints. * config/avr/avr.h (LONG_LONG_ACCUM_TYPE_SIZE): Define. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@190644 138bc75d-0d04-0410-961f-82ee72b054a4
author: gjl <gjl@138bc75d-0d04-0410-961f-82ee72b054a4> 2012-08-24 12:42:48 +0000
committer: gjl <gjl@138bc75d-0d04-0410-961f-82ee72b054a4> 2012-08-24 12:42:48 +0000
commit: 017c5b989ac3e3c65e0af360accad2f3ef281fad (patch)
tree: 9180eac44e1ace2c0794f565cff49b3847b05c2b /libgcc
parent: 5d34a30f668dc6540591e50a58fdddd143842f62 (diff)
download: gcc-017c5b989ac3e3c65e0af360accad2f3ef281fad.tar.gz
5 files changed, 1308 insertions, 150 deletions
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index 33ad5185479..60f19491d76 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,23 @@
+2012-08-24  Georg-Johann Lay  <avr@gjlay.de>
+
+	PR target/54222
+	* config/avr/lib1funcs-fixed.S: New file.
+	* config/avr/lib1funcs.S: Include it.  Undefine some divmodsi
+	after they are used.
+	(neg2, neg4): New macros.
+	(__mulqihi3,__umulqihi3,__mulhi3): Rewrite non-MUL variants.
+	(__mulhisi3,__umulhisi3,__mulsi3): Rewrite non-MUL variants.
+	(__umulhisi3): Speed up MUL variant if there is enough flash.
+	* config/avr/avr-lib.h (TA, UTA): Adjust according to gcc's
+	avr-modes.def.
+	* config/avr/t-avr (LIB1ASMFUNCS): Add: _fractqqsf, _fractuqqsf,
+	_fracthqsf, _fractuhqsf, _fracthasf, _fractuhasf, _fractsasf,
+	_fractusasf, _fractsfqq, _fractsfuqq, _fractsfhq, _fractsfuhq,
+	_fractsfha, _fractsfsa, _mulqq3, _muluqq3, _mulhq3, _muluhq3,
+	_mulha3, _muluha3, _mulsa3, _mulusa3, _divqq3, _udivuqq3, _divhq3,
+	_udivuhq3, _divha3, _udivuha3, _divsa3, _udivusa3.
+	(LIB2FUNCS_EXCLUDE): Add supported functions.
+
 2012-08-22  Georg-Johann Lay  <avr@gjlay.de>
 
 	* Makefile.in (fixed-funcs,fixed-conv-funcs): filter-out
diff --git a/libgcc/config/avr/avr-lib.h b/libgcc/config/avr/avr-lib.h
index daca4d81f9a..66082eb8a48 100644
--- a/libgcc/config/avr/avr-lib.h
+++ b/libgcc/config/avr/avr-lib.h
@@ -4,3 +4,79 @@
 #define DI SI
 typedef int QItype __attribute__ ((mode (QI)));
 #endif
+
+/* fixed-bit.h does not define functions for TA and UTA because
+   that part is wrapped in #if MIN_UNITS_PER_WORD > 4.
+   This would lead to empty functions for TA and UTA.
+   Thus, supply appropriate defines as if HAVE_[U]TA == 1.
+   #define HAVE_[U]TA 1 won't work because avr-modes.def
+   uses ADJUST_BYTESIZE(TA,8) and fixed-bit.h is not generic enough
+   to arrange for such changes of the mode size.  */
+
+typedef unsigned _Fract UTAtype __attribute__ ((mode (UTA)));
+
+#if defined (UTA_MODE)
+#define FIXED_SIZE      8       /* in bytes */
+#define INT_C_TYPE      UDItype
+#define UINT_C_TYPE     UDItype
+#define HINT_C_TYPE     USItype
+#define HUINT_C_TYPE    USItype
+#define MODE_NAME       UTA
+#define MODE_NAME_S     uta
+#define MODE_UNSIGNED   1
+#endif
+
+#if defined (FROM_UTA)
+#define FROM_TYPE               4       /* ID for fixed-point */
+#define FROM_MODE_NAME          UTA
+#define FROM_MODE_NAME_S        uta
+#define FROM_INT_C_TYPE         UDItype
+#define FROM_SINT_C_TYPE        DItype
+#define FROM_UINT_C_TYPE        UDItype
+#define FROM_MODE_UNSIGNED      1
+#define FROM_FIXED_SIZE         8       /* in bytes */
+#elif defined (TO_UTA)
+#define TO_TYPE                 4       /* ID for fixed-point */
+#define TO_MODE_NAME            UTA
+#define TO_MODE_NAME_S          uta
+#define TO_INT_C_TYPE           UDItype
+#define TO_SINT_C_TYPE          DItype
+#define TO_UINT_C_TYPE          UDItype
+#define TO_MODE_UNSIGNED        1
+#define TO_FIXED_SIZE           8       /* in bytes */
+#endif
+
+/* Same for TAmode */
+
+typedef _Fract TAtype  __attribute__ ((mode (TA)));
+
+#if defined (TA_MODE)
+#define FIXED_SIZE      8       /* in bytes */
+#define INT_C_TYPE      DItype
+#define UINT_C_TYPE     UDItype
+#define HINT_C_TYPE     SItype
+#define HUINT_C_TYPE    USItype
+#define MODE_NAME       TA
+#define MODE_NAME_S     ta
+#define MODE_UNSIGNED   0
+#endif
+
+#if defined (FROM_TA)
+#define FROM_TYPE               4       /* ID for fixed-point */
+#define FROM_MODE_NAME          TA
+#define FROM_MODE_NAME_S        ta
+#define FROM_INT_C_TYPE         DItype
+#define FROM_SINT_C_TYPE        DItype
+#define FROM_UINT_C_TYPE        UDItype
+#define FROM_MODE_UNSIGNED      0
+#define FROM_FIXED_SIZE         8       /* in bytes */
+#elif defined (TO_TA)
+#define TO_TYPE                 4       /* ID for fixed-point */
+#define TO_MODE_NAME            TA
+#define TO_MODE_NAME_S          ta
+#define TO_INT_C_TYPE           DItype
+#define TO_SINT_C_TYPE          DItype
+#define TO_UINT_C_TYPE          UDItype
+#define TO_MODE_UNSIGNED        0
+#define TO_FIXED_SIZE           8       /* in bytes */
+#endif
diff --git a/libgcc/config/avr/lib1funcs-fixed.S b/libgcc/config/avr/lib1funcs-fixed.S
new file mode 100644
index 00000000000..c1aff53d5fd
--- /dev/null
+++ b/libgcc/config/avr/lib1funcs-fixed.S
@@ -0,0 +1,874 @@
+/*  -*- Mode: Asm -*-  */
+;;    Copyright (C) 2012
+;;    Free Software Foundation, Inc.
+;;    Contributed by Sean D'Epagnier  (sean@depagnier.com)
+;;                   Georg-Johann Lay (avr@gjlay.de)
+
+;; This file is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by the
+;; Free Software Foundation; either version 3, or (at your option) any
+;; later version.
+
+;; In addition to the permissions in the GNU General Public License, the
+;; Free Software Foundation gives you unlimited permission to link the
+;; compiled version of this file into combinations with other programs,
+;; and to distribute those combinations without any restriction coming
+;; from the use of this file.  (The General Public License restrictions
+;; do apply in other respects; for example, they cover modification of
+;; the file, and distribution when not linked into a combine
+;; executable.)
+
+;; This file is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with this program; see the file COPYING.  If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Fixed point library routines for AVR
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+.section .text.libgcc.fixed, "ax", @progbits
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Conversions to float
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+#if defined (L_fractqqsf)
+DEFUN __fractqqsf
+    ;; Move in place for SA -> SF conversion
+    clr     r22
+    mov     r23, r24
+    lsl     r23
+    ;; Sign-extend
+    sbc     r24, r24
+    mov     r25, r24
+    XJMP    __fractsasf
+ENDF __fractqqsf
+#endif  /* L_fractqqsf */
+
+#if defined (L_fractuqqsf)
+DEFUN __fractuqqsf
+    ;; Move in place for USA -> SF conversion
+    clr     r22
+    mov     r23, r24
+    ;; Zero-extend
+    clr     r24
+    clr     r25
+    XJMP    __fractusasf
+ENDF __fractuqqsf
+#endif  /* L_fractuqqsf */
+
+#if defined (L_fracthqsf)
+DEFUN __fracthqsf
+    ;; Move in place for SA -> SF conversion
+    wmov    22, 24
+    lsl     r22
+    rol     r23
+    ;; Sign-extend
+    sbc     r24, r24
+    mov     r25, r24
+    XJMP    __fractsasf
+ENDF __fracthqsf
+#endif  /* L_fracthqsf */
+
+#if defined (L_fractuhqsf)
+DEFUN __fractuhqsf
+    ;; Move in place for USA -> SF conversion
+    wmov    22, 24
+    ;; Zero-extend
+    clr     r24
+    clr     r25
+    XJMP    __fractusasf
+ENDF __fractuhqsf
+#endif  /* L_fractuhqsf */
+
+#if defined (L_fracthasf)
+DEFUN __fracthasf
+    ;; Move in place for SA -> SF conversion
+    clr     r22
+    mov     r23, r24
+    mov     r24, r25
+    ;; Sign-extend
+    lsl     r25
+    sbc     r25, r25
+    XJMP    __fractsasf
+ENDF __fracthasf
+#endif  /* L_fracthasf */
+
+#if defined (L_fractuhasf)
+DEFUN __fractuhasf
+    ;; Move in place for USA -> SF conversion
+    clr     r22
+    mov     r23, r24
+    mov     r24, r25
+    ;; Zero-extend
+    clr     r25
+    XJMP    __fractusasf
+ENDF __fractuhasf
+#endif  /* L_fractuhasf */
+
+
+#if defined (L_fractsqsf)
+DEFUN __fractsqsf
+    XCALL   __floatsisf
+    ;; Divide non-zero results by 2^31 to move the
+    ;; decimal point into place
+    tst     r25
+    breq    0f
+    subi    r24, exp_lo (31)
+    sbci    r25, exp_hi (31)
+0:  ret
+ENDF __fractsqsf
+#endif  /* L_fractsqsf */
+
+#if defined (L_fractusqsf)
+DEFUN __fractusqsf
+    XCALL   __floatunsisf
+    ;; Divide non-zero results by 2^32 to move the
+    ;; decimal point into place
+    cpse    r25, __zero_reg__
+    subi    r25, exp_hi (32)
+    ret
+ENDF __fractusqsf
+#endif  /* L_fractusqsf */
+
+#if defined (L_fractsasf)
+DEFUN __fractsasf
+    XCALL   __floatsisf
+    ;; Divide non-zero results by 2^16 to move the
+    ;; decimal point into place
+    cpse    r25, __zero_reg__
+    subi    r25, exp_hi (16)
+    ret
+ENDF __fractsasf
+#endif  /* L_fractsasf */
+
+#if defined (L_fractusasf)
+DEFUN __fractusasf
+    XCALL   __floatunsisf
+    ;; Divide non-zero results by 2^16 to move the
+    ;; decimal point into place
+    cpse    r25, __zero_reg__
+    subi    r25, exp_hi (16)
+    ret
+ENDF __fractusasf
+#endif  /* L_fractusasf */
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Conversions from float
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+       
+#if defined (L_fractsfqq)
+DEFUN __fractsfqq
+    ;; Multiply with 2^{24+7} to get a QQ result in r25
+    subi    r24, exp_lo (-31)
+    sbci    r25, exp_hi (-31)
+    XCALL   __fixsfsi
+    mov     r24, r25
+    ret
+ENDF __fractsfqq
+#endif  /* L_fractsfqq */
+
+#if defined (L_fractsfuqq)
+DEFUN __fractsfuqq
+    ;; Multiply with 2^{24+8} to get a UQQ result in r25
+    subi    r25, exp_hi (-32)
+    XCALL   __fixunssfsi
+    mov     r24, r25
+    ret
+ENDF __fractsfuqq
+#endif  /* L_fractsfuqq */
+
+#if defined (L_fractsfha)
+DEFUN __fractsfha
+    ;; Multiply with 2^24 to get a HA result in r25:r24
+    subi    r25, exp_hi (-24)
+    XJMP    __fixsfsi
+ENDF __fractsfha
+#endif  /* L_fractsfha */
+
+#if defined (L_fractsfuha)
+DEFUN __fractsfuha
+    ;; Multiply with 2^24 to get a UHA result in r25:r24
+    subi    r25, exp_hi (-24)
+    XJMP    __fixunssfsi
+ENDF __fractsfuha
+#endif  /* L_fractsfuha */
+
+#if defined (L_fractsfhq)
+DEFUN __fractsfsq
+ENDF  __fractsfsq
+
+DEFUN __fractsfhq
+    ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
+    ;; resp. with 2^31 to get a SQ result in r25:r22
+    subi    r24, exp_lo (-31)
+    sbci    r25, exp_hi (-31)
+    XJMP    __fixsfsi
+ENDF __fractsfhq
+#endif  /* L_fractsfhq */
+
+#if defined (L_fractsfuhq)
+DEFUN __fractsfusq
+ENDF  __fractsfusq
+
+DEFUN __fractsfuhq
+    ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
+    ;; resp. with 2^32 to get a USQ result in r25:r22
+    subi    r25, exp_hi (-32)
+    XJMP    __fixunssfsi
+ENDF __fractsfuhq
+#endif  /* L_fractsfuhq */
+
+#if defined (L_fractsfsa)
+DEFUN __fractsfsa
+    ;; Multiply with 2^16 to get a SA result in r25:r22
+    subi    r25, exp_hi (-16)
+    XJMP    __fixsfsi
+ENDF __fractsfsa
+#endif  /* L_fractsfsa */
+
+#if defined (L_fractsfusa)
+DEFUN __fractsfusa
+    ;; Multiply with 2^16 to get a USA result in r25:r22
+    subi    r25, exp_hi (-16)
+    XJMP    __fixunssfsi
+ENDF __fractsfusa
+#endif  /* L_fractsfusa */
+
+
+;; For multiplication the functions here are called directly from
+;; avr-fixed.md instead of using the standard libcall mechanisms.
+;; This can make better code because GCC knows exactly which
+;; of the call-used registers (not all of them) are clobbered.  */
+
+/*******************************************************
+    Fractional  Multiplication  8 x 8  without MUL
+*******************************************************/
+
+#if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__)
+;;; R23 = R24 * R25
+;;; Clobbers: __tmp_reg__, R22, R24, R25
+;;; Rounding: ???
+DEFUN __mulqq3
+    XCALL   __fmuls
+    ;; TR 18037 requires that  (-1) * (-1)  does not overflow
+    ;; The only input that can produce  -1  is  (-1)^2.
+    dec     r23
+    brvs    0f
+    inc     r23
+0:  ret
+ENDF  __mulqq3
+#endif /* L_mulqq3 && ! HAVE_MUL */
+
+/*******************************************************
+    Fractional Multiply  .16 x .16  with and without MUL
+*******************************************************/
+
+#if defined (L_mulhq3)
+;;; Same code with and without MUL, but the interfaces differ:
+;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
+;;;         Clobbers: ABI, called by optabs
+;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
+;;;         Clobbers: __tmp_reg__, R22, R23
+;;; Rounding:  -0.5 LSB  <= error  <=  0.5 LSB
+DEFUN   __mulhq3
+    XCALL   __mulhisi3
+    ;; Shift result into place
+    lsl     r23
+    rol     r24
+    rol     r25
+    brvs    1f
+    ;; Round
+    sbrc    r23, 7
+    adiw    r24, 1
+    ret
+1:  ;; Overflow.  TR 18037 requires  (-1)^2  not to overflow
+    ldi     r24, lo8 (0x7fff)
+    ldi     r25, hi8 (0x7fff)
+    ret
+ENDF __mulhq3
+#endif  /* defined (L_mulhq3) */
+
+#if defined (L_muluhq3)
+;;; Same code with and without MUL, but the interfaces differ:
+;;; no MUL: (R25:R24) *= (R23:R22)
+;;;         Clobbers: ABI, called by optabs
+;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
+;;;         Clobbers: __tmp_reg__, R22, R23
+;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB
+DEFUN   __muluhq3
+    XCALL   __umulhisi3
+    ;; Round
+    sbrc    r23, 7
+    adiw    r24, 1
+    ret
+ENDF __muluhq3
+#endif  /* L_muluhq3 */
+
+
+/*******************************************************
+    Fixed  Multiply  8.8 x 8.8  with and without MUL
+*******************************************************/
+
+#if defined (L_mulha3)
+;;; Same code with and without MUL, but the interfaces differ:
+;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25)
+;;;         Clobbers: ABI, called by optabs
+;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
+;;;         Clobbers: __tmp_reg__, R22, R23
+;;; Rounding:  -0.5 LSB  <=  error  <=  0.5 LSB
+DEFUN   __mulha3
+    XCALL   __mulhisi3
+    XJMP    __muluha3_round
+ENDF __mulha3
+#endif  /* L_mulha3 */
+
+#if defined (L_muluha3)
+;;; Same code with and without MUL, but the interfaces differ:
+;;; no MUL: (R25:R24) *= (R23:R22)
+;;;         Clobbers: ABI, called by optabs
+;;; MUL:    (R25:R24) = (R19:R18) * (R27:R26)
+;;;         Clobbers: __tmp_reg__, R22, R23
+;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB
+DEFUN   __muluha3
+    XCALL   __umulhisi3
+    XJMP    __muluha3_round
+ENDF __muluha3
+#endif  /* L_muluha3 */
+
+#if defined (L_muluha3_round)
+DEFUN   __muluha3_round
+    ;; Shift result into place
+    mov     r25, r24
+    mov     r24, r23
+    ;; Round
+    sbrc    r22, 7
+    adiw    r24, 1
+    ret
+ENDF __muluha3_round
+#endif  /* L_muluha3_round */
+
+
+/*******************************************************
+    Fixed  Multiplication  16.16 x 16.16
+*******************************************************/
+
+#if defined (__AVR_HAVE_MUL__)
+
+;; Multiplier
+#define A0  16
+#define A1  A0+1
+#define A2  A1+1
+#define A3  A2+1
+
+;; Multiplicand
+#define B0  20
+#define B1  B0+1
+#define B2  B1+1
+#define B3  B2+1
+
+;; Result
+#define C0  24
+#define C1  C0+1
+#define C2  C1+1
+#define C3  C2+1
+
+#if defined (L_mulusa3)
+;;; (C3:C0) = (A3:A0) * (B3:B0)
+;;; Clobbers: __tmp_reg__
+;;; Rounding:  -0.5 LSB  <  error  <=  0.5 LSB
+DEFUN   __mulusa3
+    ;; Some of the MUL instructions have LSBs outside the result.
+    ;; Don't ignore these LSBs in order to tame rounding error.
+    ;; Use C2/C3 for these LSBs.
+
+    clr C0
+    clr C1
+    mul A0, B0  $  movw C2, r0
+
+    mul A1, B0  $  add  C3, r0  $  adc C0, r1
+    mul A0, B1  $  add  C3, r0  $  adc C0, r1  $  rol C1
+    
+    ;; Round
+    sbrc C3, 7
+    adiw C0, 1
+    
+    ;; The following MULs don't have LSBs outside the result.
+    ;; C2/C3 is the high part.
+
+    mul  A0, B2  $  add C0, r0  $  adc C1, r1  $  sbc  C2, C2
+    mul  A1, B1  $  add C0, r0  $  adc C1, r1  $  sbci C2, 0
+    mul  A2, B0  $  add C0, r0  $  adc C1, r1  $  sbci C2, 0
+    neg  C2
+
+    mul  A0, B3  $  add C1, r0  $  adc C2, r1  $  sbc  C3, C3
+    mul  A1, B2  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
+    mul  A2, B1  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
+    mul  A3, B0  $  add C1, r0  $  adc C2, r1  $  sbci C3, 0
+    neg  C3
+    
+    mul  A1, B3  $  add C2, r0  $  adc C3, r1
+    mul  A2, B2  $  add C2, r0  $  adc C3, r1
+    mul  A3, B1  $  add C2, r0  $  adc C3, r1
+    
+    mul  A2, B3  $  add C3, r0
+    mul  A3, B2  $  add C3, r0
+
+    clr  __zero_reg__
+    ret
+ENDF __mulusa3
+#endif /* L_mulusa3 */
+
+#if defined (L_mulsa3)
+;;; (C3:C0) = (A3:A0) * (B3:B0)
+;;; Clobbers: __tmp_reg__
+;;; Rounding:  -0.5 LSB  <=  error  <=  0.5 LSB
+DEFUN __mulsa3
+    XCALL   __mulusa3
+    tst     B3
+    brpl    1f
+    sub     C2, A0
+    sbc     C3, A1
+1:  sbrs    A3, 7
+    ret
+    sub     C2, B0
+    sbc     C3, B1
+    ret
+ENDF __mulsa3
+#endif /* L_mulsa3 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#else /* __AVR_HAVE_MUL__ */
+
+#define A0 18
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+
+#define B0 22
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define C0  22
+#define C1  C0+1
+#define C2  C0+2
+#define C3  C0+3
+
+;; __tmp_reg__
+#define CC0  0
+;; __zero_reg__
+#define CC1  1
+#define CC2  16
+#define CC3  17
+
+#define AA0  26
+#define AA1  AA0+1
+#define AA2  30
+#define AA3  AA2+1
+
+#if defined (L_mulsa3)
+;;; (R25:R22)  *=  (R21:R18)
+;;; Clobbers: ABI, called by optabs
+;;; Rounding:  -1 LSB  <=  error  <=  1 LSB
+DEFUN   __mulsa3
+    push    B0
+    push    B1
+    bst     B3, 7
+    XCALL   __mulusa3
+    ;; A survived in  31:30:27:26
+    rcall 1f
+    pop     AA1
+    pop     AA0
+    bst     AA3, 7
+1:  brtc  9f
+    ;; 1-extend A/B
+    sub     C2, AA0
+    sbc     C3, AA1
+9:  ret
+ENDF __mulsa3
+#endif  /* L_mulsa3 */
+
+#if defined (L_mulusa3)
+;;; (R25:R22)  *=  (R21:R18)
+;;; Clobbers: ABI, called by optabs and __mulsua
+;;; Rounding:  -1 LSB  <=  error  <=  1 LSB
+;;; Does not clobber T and A[] survives in 26, 27, 30, 31
+DEFUN   __mulusa3
+    push    CC2
+    push    CC3
+    ; clear result
+    clr     __tmp_reg__
+    wmov    CC2, CC0
+    ; save multiplicand
+    wmov    AA0, A0
+    wmov    AA2, A2
+    rjmp 3f
+
+    ;; Loop the integral part
+
+1:  ;; CC += A * 2^n;  n >= 0
+    add  CC0,A0  $  adc CC1,A1  $  adc  CC2,A2  $  adc  CC3,A3
+
+2:  ;; A <<= 1
+    lsl  A0      $  rol A1      $  rol  A2      $  rol  A3
+
+3:  ;; IBIT(B) >>= 1
+    ;; Carry = n-th bit of B;  n >= 0
+    lsr     B3
+    ror     B2
+    brcs 1b
+    sbci    B3, 0
+    brne 2b
+
+    ;; Loop the fractional part
+    ;; B2/B3 is 0 now, use as guard bits for rounding
+    ;; Restore multiplicand
+    wmov    A0, AA0
+    wmov    A2, AA2
+    rjmp 5f
+
+4:  ;; CC += A:Guard * 2^n;  n < 0
+    add  B3,B2 $  adc  CC0,A0  $  adc  CC1,A1  $  adc  CC2,A2  $  adc  CC3,A3
+5:
+    ;; A:Guard >>= 1
+    lsr  A3   $  ror  A2  $  ror  A1  $  ror   A0  $   ror  B2
+
+    ;; FBIT(B) <<= 1
+    ;; Carry = n-th bit of B;  n < 0
+    lsl     B0
+    rol     B1
+    brcs 4b
+    sbci    B0, 0
+    brne 5b
+
+    ;; Move result into place and round
+    lsl     B3
+    wmov    C2, CC2
+    wmov    C0, CC0
+    clr     __zero_reg__
+    adc     C0, __zero_reg__
+    adc     C1, __zero_reg__
+    adc     C2, __zero_reg__
+    adc     C3, __zero_reg__
+    
+    ;; Epilogue
+    pop     CC3
+    pop     CC2
+    ret
+ENDF __mulusa3
+#endif  /* L_mulusa3 */
+
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef AA0
+#undef AA1
+#undef AA2
+#undef AA3
+#undef CC0
+#undef CC1
+#undef CC2
+#undef CC3
+
+#endif /* __AVR_HAVE_MUL__ */
+
+/*******************************************************
+      Fractional Division 8 / 8
+*******************************************************/
+
+#define r_divd  r25     /* dividend */
+#define r_quo   r24     /* quotient */
+#define r_div   r22     /* divisor */
+
+#if defined (L_divqq3)
+DEFUN   __divqq3
+    mov     r0, r_divd
+    eor     r0, r_div
+    sbrc    r_div, 7
+    neg     r_div
+    sbrc    r_divd, 7
+    neg     r_divd
+    cp      r_divd, r_div
+    breq    __divqq3_minus1  ; if equal return -1
+    XCALL   __udivuqq3
+    lsr     r_quo
+    sbrc    r0, 7   ; negate result if needed
+    neg     r_quo
+    ret
+__divqq3_minus1:
+    ldi     r_quo, 0x80
+    ret
+ENDF __divqq3
+#endif  /* defined (L_divqq3) */
+
+#if defined (L_udivuqq3)
+DEFUN   __udivuqq3
+    clr     r_quo           ; clear quotient
+    inc     __zero_reg__    ; init loop counter, used per shift
+__udivuqq3_loop:
+    lsl     r_divd          ; shift dividend
+    brcs    0f              ; dividend overflow
+    cp      r_divd,r_div    ; compare dividend & divisor
+    brcc    0f              ; dividend >= divisor
+    rol     r_quo           ; shift quotient (with CARRY)
+    rjmp    __udivuqq3_cont
+0:
+    sub     r_divd,r_div    ; restore dividend
+    lsl     r_quo           ; shift quotient (without CARRY)
+__udivuqq3_cont:
+    lsl     __zero_reg__    ; shift loop-counter bit
+    brne    __udivuqq3_loop
+    com     r_quo           ; complement result
+                            ; because C flag was complemented in loop
+    ret
+ENDF __udivuqq3
+#endif  /* defined (L_udivuqq3) */
+
+#undef  r_divd
+#undef  r_quo
+#undef  r_div
+
+
+/*******************************************************
+    Fractional Division 16 / 16
+*******************************************************/
+#define r_divdL 26     /* dividend Low */
+#define r_divdH 27     /* dividend Hig */
+#define r_quoL  24     /* quotient Low */
+#define r_quoH  25     /* quotient High */
+#define r_divL  22     /* divisor */
+#define r_divH  23     /* divisor */
+#define r_cnt   21
+
+#if defined (L_divhq3)
+DEFUN   __divhq3
+    mov     r0, r_divdH
+    eor     r0, r_divH
+    sbrs    r_divH, 7
+    rjmp    1f
+    NEG2    r_divL
+1:
+    sbrs    r_divdH, 7
+    rjmp    2f
+    NEG2    r_divdL
+2:
+    cp      r_divdL, r_divL
+    cpc     r_divdH, r_divH
+    breq    __divhq3_minus1  ; if equal return -1
+    XCALL   __udivuhq3
+    lsr     r_quoH
+    ror     r_quoL
+    brpl    9f
+    ;; negate result if needed
+    NEG2    r_quoL
+9:
+    ret
+__divhq3_minus1:
+    ldi     r_quoH, 0x80
+    clr     r_quoL
+    ret
+ENDF __divhq3
+#endif  /* defined (L_divhq3) */
+
+#if defined (L_udivuhq3)
+DEFUN   __udivuhq3
+    sub     r_quoH,r_quoH   ; clear quotient and carry
+    ;; FALLTHRU
+ENDF __udivuhq3
+
+DEFUN   __udivuha3_common
+    clr     r_quoL          ; clear quotient
+    ldi     r_cnt,16        ; init loop counter
+__udivuhq3_loop:
+    rol     r_divdL         ; shift dividend (with CARRY)
+    rol     r_divdH
+    brcs    __udivuhq3_ep   ; dividend overflow
+    cp      r_divdL,r_divL  ; compare dividend & divisor
+    cpc     r_divdH,r_divH
+    brcc    __udivuhq3_ep   ; dividend >= divisor
+    rol     r_quoL          ; shift quotient (with CARRY)
+    rjmp    __udivuhq3_cont
+__udivuhq3_ep:
+    sub     r_divdL,r_divL  ; restore dividend
+    sbc     r_divdH,r_divH
+    lsl     r_quoL          ; shift quotient (without CARRY)
+__udivuhq3_cont:
+    rol     r_quoH          ; shift quotient
+    dec     r_cnt           ; decrement loop counter
+    brne    __udivuhq3_loop
+    com     r_quoL          ; complement result
+    com     r_quoH          ; because C flag was complemented in loop
+    ret
+ENDF __udivuha3_common
+#endif  /* defined (L_udivuhq3) */
+
+/*******************************************************
+    Fixed Division 8.8 / 8.8
+*******************************************************/
+#if defined (L_divha3)
+DEFUN   __divha3
+    mov     r0, r_divdH
+    eor     r0, r_divH
+    sbrs    r_divH, 7
+    rjmp    1f
+    NEG2    r_divL
+1:
+    sbrs    r_divdH, 7
+    rjmp    2f
+    NEG2    r_divdL
+2:
+    XCALL   __udivuha3
+    sbrs    r0, 7   ; negate result if needed
+    ret
+    NEG2    r_quoL
+    ret
+ENDF __divha3
+#endif  /* defined (L_divha3) */
+
+#if defined (L_udivuha3)
+DEFUN   __udivuha3
+    mov     r_quoH, r_divdL
+    mov     r_divdL, r_divdH
+    clr     r_divdH
+    lsl     r_quoH     ; shift quotient into carry
+    XJMP    __udivuha3_common ; same as fractional after rearrange
+ENDF __udivuha3
+#endif  /* defined (L_udivuha3) */
+
+#undef  r_divdL
+#undef  r_divdH
+#undef  r_quoL
+#undef  r_quoH
+#undef  r_divL
+#undef  r_divH
+#undef  r_cnt
+
+/*******************************************************
+    Fixed Division 16.16 / 16.16
+*******************************************************/
+
+#define r_arg1L  24    /* arg1 gets passed already in place */
+#define r_arg1H  25
+#define r_arg1HL 26
+#define r_arg1HH 27
+#define r_divdL  26    /* dividend Low */
+#define r_divdH  27
+#define r_divdHL 30
+#define r_divdHH 31    /* dividend High */
+#define r_quoL   22    /* quotient Low */
+#define r_quoH   23
+#define r_quoHL  24
+#define r_quoHH  25    /* quotient High */
+#define r_divL   18    /* divisor Low */
+#define r_divH   19
+#define r_divHL  20
+#define r_divHH  21    /* divisor High */
+#define r_cnt  __zero_reg__  /* loop count (0 after the loop!) */
+
+#if defined (L_divsa3)
+DEFUN   __divsa3
+    mov     r0, r_arg1HH
+    eor     r0, r_divHH
+    sbrs    r_divHH, 7
+    rjmp    1f
+    NEG4    r_divL
+1:
+    sbrs    r_arg1HH, 7
+    rjmp    2f
+    NEG4    r_arg1L
+2:
+    XCALL   __udivusa3
+    sbrs    r0, 7   ; negate result if needed
+    ret
+    NEG4    r_quoL
+    ret
+ENDF __divsa3
+#endif  /* defined (L_divsa3) */
+
+#if defined (L_udivusa3)
+DEFUN   __udivusa3
+    ldi     r_divdHL, 32    ; init loop counter
+    mov     r_cnt, r_divdHL
+    clr     r_divdHL
+    clr     r_divdHH
+    wmov    r_quoL, r_divdHL
+    lsl     r_quoHL         ; shift quotient into carry
+    rol     r_quoHH
+__udivusa3_loop:
+    rol     r_divdL         ; shift dividend (with CARRY)
+    rol     r_divdH
+    rol     r_divdHL
+    rol     r_divdHH
+    brcs    __udivusa3_ep   ; dividend overflow
+    cp      r_divdL,r_divL  ; compare dividend & divisor
+    cpc     r_divdH,r_divH
+    cpc     r_divdHL,r_divHL
+    cpc     r_divdHH,r_divHH
+    brcc    __udivusa3_ep   ; dividend >= divisor
+    rol     r_quoL          ; shift quotient (with CARRY)
+    rjmp    __udivusa3_cont
+__udivusa3_ep:
+    sub     r_divdL,r_divL  ; restore dividend
+    sbc     r_divdH,r_divH
+    sbc     r_divdHL,r_divHL
+    sbc     r_divdHH,r_divHH
+    lsl     r_quoL          ; shift quotient (without CARRY)
+__udivusa3_cont:
+    rol     r_quoH          ; shift quotient
+    rol     r_quoHL
+    rol     r_quoHH
+    dec     r_cnt           ; decrement loop counter
+    brne    __udivusa3_loop
+    com     r_quoL          ; complement result
+    com     r_quoH          ; because C flag was complemented in loop
+    com     r_quoHL
+    com     r_quoHH
+    ret
+ENDF __udivusa3
+#endif  /* defined (L_udivusa3) */
+
+#undef  r_arg1L
+#undef  r_arg1H
+#undef  r_arg1HL
+#undef  r_arg1HH
+#undef  r_divdL
+#undef  r_divdH
+#undef  r_divdHL
+#undef  r_divdHH
+#undef  r_quoL
+#undef  r_quoH
+#undef  r_quoHL
+#undef  r_quoHH
+#undef  r_divL
+#undef  r_divH
+#undef  r_divHL
+#undef  r_divHH
+#undef  r_cnt
diff --git a/libgcc/config/avr/lib1funcs.S b/libgcc/config/avr/lib1funcs.S
index 95a7d3d4eeb..6b9879ee7d7 100644
--- a/libgcc/config/avr/lib1funcs.S
+++ b/libgcc/config/avr/lib1funcs.S
@@ -91,6 +91,35 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 .endfunc
 .endm
 
+;; Negate a 2-byte value held in consecutive registers
+.macro NEG2  reg
+    com     \reg+1
+    neg     \reg
+    sbci    \reg+1, -1
+.endm
+
+;; Negate a 4-byte value held in consecutive registers
+.macro NEG4  reg
+    com     \reg+3
+    com     \reg+2
+    com     \reg+1
+.if \reg >= 16
+    neg     \reg
+    sbci    \reg+1, -1
+    sbci    \reg+2, -1
+    sbci    \reg+3, -1
+.else
+    com     \reg
+    adc     \reg,   __zero_reg__
+    adc     \reg+1, __zero_reg__
+    adc     \reg+2, __zero_reg__
+    adc     \reg+3, __zero_reg__
+.endif
+.endm
+
+#define exp_lo(N)  hlo8 ((N) << 23)
+#define exp_hi(N)  hhi8 ((N) << 23)
+
 
 .section .text.libgcc.mul, "ax", @progbits
 
@@ -126,175 +155,246 @@ ENDF __mulqi3
 	
 #endif 	/* defined (L_mulqi3) */
 
-#if defined (L_mulqihi3)
-DEFUN __mulqihi3
-	clr	r25
-	sbrc	r24, 7
-	dec	r25
-	clr	r23
-	sbrc	r22, 7
-	dec	r22
-	XJMP	__mulhi3
-ENDF __mulqihi3:
-#endif /* defined (L_mulqihi3) */
+
+/*******************************************************
+    Widening Multiplication  16 = 8 x 8  without MUL
+    Multiplication  16 x 16  without MUL
+*******************************************************/
+
+#define A0  r22
+#define A1  r23
+#define B0  r24
+#define BB0 r20
+#define B1  r25
+;; Output overlaps input, thus expand result in CC0/1
+#define C0  r24
+#define C1  r25
+#define CC0  __tmp_reg__
+#define CC1  R21
 
 #if defined (L_umulqihi3)
+;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
+;;; (C1:C0) = (unsigned int) A0  * (unsigned int) B0
+;;; Clobbers: __tmp_reg__, R21..R23
 DEFUN __umulqihi3
-	clr	r25
-	clr	r23
-	XJMP	__mulhi3
+    clr     A1
+    clr     B1
+    XJMP    __mulhi3
 ENDF __umulqihi3
-#endif /* defined (L_umulqihi3) */
+#endif /* L_umulqihi3 */
 
-/*******************************************************
-    Multiplication  16 x 16  without MUL
-*******************************************************/
-#if defined (L_mulhi3)
-#define	r_arg1L	r24		/* multiplier Low */
-#define	r_arg1H	r25		/* multiplier High */
-#define	r_arg2L	r22		/* multiplicand Low */
-#define	r_arg2H	r23		/* multiplicand High */
-#define r_resL	__tmp_reg__	/* result Low */
-#define r_resH  r21		/* result High */
+#if defined (L_mulqihi3)
+;;; R25:R24 = (signed int) R22 * (signed int) R24
+;;; (C1:C0) = (signed int) A0  * (signed int) B0
+;;; Clobbers: __tmp_reg__, R20..R23
+DEFUN __mulqihi3
+    ;; Sign-extend B0
+    clr     B1
+    sbrc    B0, 7
+    com     B1
+    ;; The multiplication runs twice as fast if A1 is zero, thus:
+    ;; Zero-extend A0
+    clr     A1
+#ifdef __AVR_HAVE_JMP_CALL__
+    ;; Store  B0 * sign of A
+    clr     BB0
+    sbrc    A0, 7
+    mov     BB0, B0
+    call    __mulhi3
+#else /* have no CALL */
+    ;; Skip sign-extension of A if A >= 0
+    ;; Same size as with the first alternative but avoids errata skip
+    ;; and is faster if A >= 0
+    sbrs    A0, 7
+    rjmp    __mulhi3
+    ;; If  A < 0  store B
+    mov     BB0, B0
+    rcall   __mulhi3
+#endif /* HAVE_JMP_CALL */
+    ;; 1-extend A after the multiplication
+    sub     C1, BB0
+    ret
+ENDF __mulqihi3
+#endif /* L_mulqihi3 */
 
+#if defined (L_mulhi3)
+;;; R25:R24 = R23:R22 * R25:R24
+;;; (C1:C0) = (A1:A0) * (B1:B0)
+;;; Clobbers: __tmp_reg__, R21..R23
 DEFUN __mulhi3
-	clr	r_resH		; clear result
-	clr	r_resL		; clear result
-__mulhi3_loop:
-	sbrs	r_arg1L,0
-	rjmp	__mulhi3_skip1
-	add	r_resL,r_arg2L	; result + multiplicand
-	adc	r_resH,r_arg2H
-__mulhi3_skip1:	
-	add	r_arg2L,r_arg2L	; shift multiplicand
-	adc	r_arg2H,r_arg2H
-
-	cp	r_arg2L,__zero_reg__
-	cpc	r_arg2H,__zero_reg__
-	breq	__mulhi3_exit	; while multiplicand != 0
-
-	lsr	r_arg1H		; gets LSB of multiplier
-	ror	r_arg1L
-	sbiw	r_arg1L,0
-	brne	__mulhi3_loop	; exit if multiplier = 0
-__mulhi3_exit:
-	mov	r_arg1H,r_resH	; result to return register
-	mov	r_arg1L,r_resL
-	ret
-ENDF __mulhi3
 
-#undef r_arg1L
-#undef r_arg1H
-#undef r_arg2L
-#undef r_arg2H
-#undef r_resL 	
-#undef r_resH 
+    ;; Clear result
+    clr     CC0
+    clr     CC1
+    rjmp 3f
+1:
+    ;; Bit n of A is 1  -->  C += B << n
+    add     CC0, B0
+    adc     CC1, B1
+2:
+    lsl     B0
+    rol     B1
+3:
+    ;; If B == 0 we are ready
+    sbiw    B0, 0
+    breq 9f
+
+    ;; Carry = n-th bit of A
+    lsr     A1
+    ror     A0
+    ;; If bit n of A is set, then go add  B * 2^n  to  C
+    brcs 1b
+
+    ;; Carry = 0  -->  The ROR above acts like  CP A0, 0
+    ;; Thus, it is sufficient to CPC the high part to test A against 0
+    cpc     A1, __zero_reg__
+    ;; Only proceed if A != 0
+    brne    2b
+9:
+    ;; Move Result into place
+    mov     C0, CC0
+    mov     C1, CC1
+    ret
+ENDF  __mulhi3
+#endif /* L_mulhi3 */
 
-#endif /* defined (L_mulhi3) */
+#undef A0
+#undef A1
+#undef B0
+#undef BB0
+#undef B1
+#undef C0
+#undef C1
+#undef CC0
+#undef CC1
+
+
+#define A0 22
+#define A1 A0+1
+#define A2 A0+2
+#define A3 A0+3
+
+#define B0 18
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define CC0 26
+#define CC1 CC0+1
+#define CC2 30
+#define CC3 CC2+1
+
+#define C0 22
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
 
 /*******************************************************
     Widening Multiplication  32 = 16 x 16  without MUL
 *******************************************************/
 
-#if defined (L_mulhisi3)
-DEFUN __mulhisi3
-;;; FIXME: This is dead code (noone calls it)
-    mov_l   r18, r24
-    mov_h   r19, r25
-    clr     r24
-    sbrc    r23, 7
-    dec     r24
-    mov     r25, r24
-    clr     r20
-    sbrc    r19, 7
-    dec     r20
-    mov     r21, r20
-    XJMP    __mulsi3
-ENDF __mulhisi3
-#endif /* defined (L_mulhisi3) */
-
 #if defined (L_umulhisi3)
 DEFUN __umulhisi3
-;;; FIXME: This is dead code (noone calls it)
-    mov_l   r18, r24
-    mov_h   r19, r25
-    clr     r24
-    clr     r25
-    mov_l   r20, r24
-    mov_h   r21, r25
+    wmov    B0, 24
+    ;; Zero-extend B
+    clr     B2
+    clr     B3
+    ;; Zero-extend A
+    wmov    A2, B2
     XJMP    __mulsi3
 ENDF __umulhisi3
-#endif /* defined (L_umulhisi3) */
+#endif /* L_umulhisi3 */
+
+#if defined (L_mulhisi3)
+DEFUN __mulhisi3
+    wmov    B0, 24
+    ;; Sign-extend B
+    lsl     r25
+    sbc     B2, B2
+    mov     B3, B2
+#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
+    ;; Sign-extend A
+    clr     A2
+    sbrc    A1, 7
+    com     A2
+    mov     A3, A2
+    XJMP __mulsi3
+#else /*  no __AVR_ERRATA_SKIP_JMP_CALL__ */
+    ;; Zero-extend A and __mulsi3 will run at least twice as fast
+    ;; compared to a sign-extended A.
+    clr     A2
+    clr     A3
+    sbrs    A1, 7
+    XJMP __mulsi3
+    ;; If  A < 0  then perform the  B * 0xffff.... before the
+    ;; very multiplication by initializing the high part of the
+    ;; result CC with -B.
+    wmov    CC2, A2
+    sub     CC2, B0
+    sbc     CC3, B1
+    XJMP __mulsi3_helper
+#endif /*  __AVR_ERRATA_SKIP_JMP_CALL__ */
+ENDF __mulhisi3
+#endif /* L_mulhisi3 */
+
 
-#if defined (L_mulsi3)
 /*******************************************************
     Multiplication  32 x 32  without MUL
 *******************************************************/
-#define r_arg1L  r22		/* multiplier Low */
-#define r_arg1H  r23
-#define	r_arg1HL r24
-#define	r_arg1HH r25		/* multiplier High */
-
-#define	r_arg2L  r18		/* multiplicand Low */
-#define	r_arg2H  r19	
-#define	r_arg2HL r20
-#define	r_arg2HH r21		/* multiplicand High */
-	
-#define r_resL	 r26		/* result Low */
-#define r_resH   r27
-#define r_resHL	 r30
-#define r_resHH  r31		/* result High */
 
+#if defined (L_mulsi3)
 DEFUN __mulsi3
-	clr	r_resHH		; clear result
-	clr	r_resHL		; clear result
-	clr	r_resH		; clear result
-	clr	r_resL		; clear result
-__mulsi3_loop:
-	sbrs	r_arg1L,0
-	rjmp	__mulsi3_skip1
-	add	r_resL,r_arg2L		; result + multiplicand
-	adc	r_resH,r_arg2H
-	adc	r_resHL,r_arg2HL
-	adc	r_resHH,r_arg2HH
-__mulsi3_skip1:
-	add	r_arg2L,r_arg2L		; shift multiplicand
-	adc	r_arg2H,r_arg2H
-	adc	r_arg2HL,r_arg2HL
-	adc	r_arg2HH,r_arg2HH
-	
-	lsr	r_arg1HH	; gets LSB of multiplier
-	ror	r_arg1HL
-	ror	r_arg1H
-	ror	r_arg1L
-	brne	__mulsi3_loop
-	sbiw	r_arg1HL,0
-	cpc	r_arg1H,r_arg1L
-	brne	__mulsi3_loop		; exit if multiplier = 0
-__mulsi3_exit:
-	mov_h	r_arg1HH,r_resHH	; result to return register
-	mov_l	r_arg1HL,r_resHL
-	mov_h	r_arg1H,r_resH
-	mov_l	r_arg1L,r_resL
-	ret
-ENDF __mulsi3
+    ;; Clear result
+    clr     CC2
+    clr     CC3
+    ;; FALLTHRU
+ENDF  __mulsi3
 
-#undef r_arg1L 
-#undef r_arg1H 
-#undef r_arg1HL
-#undef r_arg1HH
-             
-#undef r_arg2L 
-#undef r_arg2H 
-#undef r_arg2HL
-#undef r_arg2HH
-             
-#undef r_resL  
-#undef r_resH  
-#undef r_resHL 
-#undef r_resHH 
+DEFUN __mulsi3_helper
+    clr     CC0
+    clr     CC1
+    rjmp 3f
+
+1:  ;; If bit n of A is set, then add  B * 2^n  to the result in CC
+    ;; CC += B
+    add  CC0,B0  $  adc  CC1,B1  $  adc  CC2,B2  $  adc  CC3,B3
+
+2:  ;; B <<= 1
+    lsl  B0      $  rol  B1      $  rol  B2      $  rol  B3
+    
+3:  ;; A >>= 1:  Carry = n-th bit of A
+    lsr  A3      $  ror  A2      $  ror  A1      $  ror  A0
+
+    brcs 1b
+    ;; Only continue if  A != 0
+    sbci    A1, 0
+    brne 2b
+    sbiw    A2, 0
+    brne 2b
+
+    ;; All bits of A are consumed:  Copy result to return register C
+    wmov    C0, CC0
+    wmov    C2, CC2
+    ret
+ENDF __mulsi3_helper
+#endif /* L_mulsi3 */
 
-#endif /* defined (L_mulsi3) */
+#undef A0
+#undef A1
+#undef A2
+#undef A3
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef CC0
+#undef CC1
+#undef CC2
+#undef CC3
 
 #endif /* !defined (__AVR_HAVE_MUL__) */
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -316,7 +416,7 @@ ENDF __mulsi3
 #define C3 C0+3
 
 /*******************************************************
-    Widening Multiplication  32 = 16 x 16
+    Widening Multiplication  32 = 16 x 16  with MUL
 *******************************************************/
                               
 #if defined (L_mulhisi3)
@@ -364,7 +464,17 @@ DEFUN __umulhisi3
     mul     A1, B1
     movw    C2, r0
     mul     A0, B1
+#ifdef __AVR_HAVE_JMP_CALL__
+    ;; This function is used by many other routines, often multiple times.
+    ;; Therefore, if the flash size is not too limited, avoid the RCALL
+    ;; and inverst 6 Bytes to speed things up.
+    add     C1, r0
+    adc     C2, r1
+    clr     __zero_reg__
+    adc     C3, __zero_reg__
+#else
     rcall   1f
+#endif
     mul     A1, B0
 1:  add     C1, r0
     adc     C2, r1
@@ -375,7 +485,7 @@ ENDF __umulhisi3
 #endif /* L_umulhisi3 */
 
 /*******************************************************
-    Widening Multiplication  32 = 16 x 32
+    Widening Multiplication  32 = 16 x 32  with MUL
 *******************************************************/
 
 #if defined (L_mulshisi3)
@@ -425,7 +535,7 @@ ENDF __muluhisi3
 #endif /* L_muluhisi3 */
 
 /*******************************************************
-    Multiplication  32 x 32
+    Multiplication  32 x 32  with MUL
 *******************************************************/
 
 #if defined (L_mulsi3)
@@ -468,7 +578,7 @@ ENDF __mulsi3
 #endif /* __AVR_HAVE_MUL__ */
 
 /*******************************************************
-       Multiplication 24 x 24
+       Multiplication 24 x 24 with MUL
 *******************************************************/
 
 #if defined (L_mulpsi3)
@@ -1247,6 +1357,19 @@ __divmodsi4_exit:
 ENDF __divmodsi4
 #endif /* defined (L_divmodsi4) */
 
+#undef r_remHH
+#undef r_remHL
+#undef r_remH
+#undef r_remL
+#undef r_arg1HH
+#undef r_arg1HL
+#undef r_arg1H
+#undef r_arg1L
+#undef r_arg2HH
+#undef r_arg2HL
+#undef r_arg2H
+#undef r_arg2L
+#undef r_cnt
 
 /*******************************************************
        Division 64 / 64
@@ -2757,9 +2880,7 @@ DEFUN __fmulsu_exit
     XJMP  __fmul
 1:  XCALL __fmul
     ;; C = -C iff A0.7 = 1
-    com  C1
-    neg  C0
-    sbci C1, -1
+    NEG2 C0
     ret
 ENDF __fmulsu_exit
 #endif /* L_fmulsu */
@@ -2794,3 +2915,5 @@ ENDF __fmul
 #undef B1
 #undef C0
 #undef C1
+
+#include "lib1funcs-fixed.S"
diff --git a/libgcc/config/avr/t-avr b/libgcc/config/avr/t-avr
index 43caa94ca2a..6f783cd9d52 100644
--- a/libgcc/config/avr/t-avr
+++ b/libgcc/config/avr/t-avr
@@ -2,6 +2,7 @@ LIB1ASMSRC = avr/lib1funcs.S
 LIB1ASMFUNCS = \
 	_mulqi3 \
 	_mulhi3 \
+	_mulqihi3 _umulqihi3 \
 	_mulpsi3 _mulsqipsi3 \
 	_mulhisi3 \
 	_umulhisi3 \
@@ -55,6 +56,24 @@ LIB1ASMFUNCS = \
 	_cmpdi2 _cmpdi2_s8 \
 	_fmul _fmuls _fmulsu
 
+# Fixed point routines in avr/lib1funcs-fixed.S
+LIB1ASMFUNCS += \
+	_fractqqsf _fractuqqsf \
+	_fracthqsf _fractuhqsf _fracthasf _fractuhasf \
+	_fractsasf _fractusasf _fractsqsf _fractusqsf \
+	\
+	_fractsfqq _fractsfuqq \
+	_fractsfhq _fractsfuhq _fractsfha _fractsfuha \
+	_fractsfsa _fractsfusa \
+	_mulqq3 \
+	_mulhq3 _muluhq3 \
+	_mulha3 _muluha3 _muluha3_round \
+	_mulsa3 _mulusa3 \
+	_divqq3 _udivuqq3 \
+	_divhq3 _udivuhq3 \
+	_divha3 _udivuha3 \
+	_divsa3 _udivusa3
+
 LIB2FUNCS_EXCLUDE = \
 	_moddi3 _umoddi3 \
 	_clz
@@ -81,3 +100,49 @@ libgcc-objects += $(patsubst %,%$(objext),$(hiintfuncs16))
 ifeq ($(enable_shared),yes)
 libgcc-s-objects += $(patsubst %,%_s$(objext),$(hiintfuncs16))
 endif
+
+
+# Filter out supported conversions from fixed-bit.c
+
+conv_XY=$(conv)$(mode1)$(mode2)
+conv_X=$(conv)$(mode)
+
+# Conversions supported by the compiler
+
+convf_modes =	 QI UQI QQ UQQ \
+		 HI UHI HQ UHQ HA UHA \
+		 SI USI SQ USQ SA USA \
+		 DI UDI DQ UDQ DA UDA \
+		 TI UTI TQ UTQ TA UTA
+
+LIB2FUNCS_EXCLUDE += \
+	$(foreach conv,_fract _fractuns,\
+	$(foreach mode1,$(convf_modes),\
+	$(foreach mode2,$(convf_modes),$(conv_XY))))
+
+# Conversions supported by lib1funcs-fixed.S
+
+conv_to_sf_modes   = QQ UQQ HQ UHQ HA UHA SQ USQ SA USA
+conv_from_sf_modes = QQ UQQ HQ UHQ HA UHA        SA USA
+
+LIB2FUNCS_EXCLUDE += \
+	$(foreach conv,_fract, \
+	$(foreach mode1,$(conv_to_sf_modes), \
+	$(foreach mode2,SF,$(conv_XY))))
+
+LIB2FUNCS_EXCLUDE += \
+	$(foreach conv,_fract,\
+	$(foreach mode1,SF,\
+	$(foreach mode2,$(conv_from_sf_modes),$(conv_XY))))
+
+# Arithmetik supported by the compiler
+
+allfix_modes = QQ UQQ HQ UHQ HA UHA SQ USQ SA USA DA UDA DQ UDQ TQ UTQ TA UTA
+
+LIB2FUNCS_EXCLUDE += \
+	$(foreach conv,_add _sub,\
+	$(foreach mode,$(allfix_modes),$(conv_X)3))
+
+LIB2FUNCS_EXCLUDE += \
+	$(foreach conv,_lshr _ashl _ashr _cmp,\
+	$(foreach mode,$(allfix_modes),$(conv_X)))
author	gjl <gjl@138bc75d-0d04-0410-961f-82ee72b054a4>	2012-08-24 12:42:48 +0000
committer	gjl <gjl@138bc75d-0d04-0410-961f-82ee72b054a4>	2012-08-24 12:42:48 +0000
commit	017c5b989ac3e3c65e0af360accad2f3ef281fad (patch)
tree	9180eac44e1ace2c0794f565cff49b3847b05c2b /libgcc
parent	5d34a30f668dc6540591e50a58fdddd143842f62 (diff)
download	gcc-017c5b989ac3e3c65e0af360accad2f3ef281fad.tar.gz