diff options
author | gjl <gjl@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-08-24 12:42:48 +0000 |
---|---|---|
committer | gjl <gjl@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-08-24 12:42:48 +0000 |
commit | 017c5b989ac3e3c65e0af360accad2f3ef281fad (patch) | |
tree | 9180eac44e1ace2c0794f565cff49b3847b05c2b /libgcc | |
parent | 5d34a30f668dc6540591e50a58fdddd143842f62 (diff) | |
download | gcc-017c5b989ac3e3c65e0af360accad2f3ef281fad.tar.gz |
libgcc/
PR target/54222
* config/avr/lib1funcs-fixed.S: New file.
* config/avr/lib1funcs.S: Include it. Undefine some divmodsi
after they are used.
(neg2, neg4): New macros.
(__mulqihi3,__umulqihi3,__mulhi3): Rewrite non-MUL variants.
(__mulhisi3,__umulhisi3,__mulsi3): Rewrite non-MUL variants.
(__umulhisi3): Speed up MUL variant if there is enough flash.
* config/avr/avr-lib.h (TA, UTA): Adjust according to gcc's
avr-modes.def.
* config/avr/t-avr (LIB1ASMFUNCS): Add: _fractqqsf, _fractuqqsf,
_fracthqsf, _fractuhqsf, _fracthasf, _fractuhasf, _fractsasf,
_fractusasf, _fractsfqq, _fractsfuqq, _fractsfhq, _fractsfuhq,
_fractsfha, _fractsfsa, _mulqq3, _muluqq3, _mulhq3, _muluhq3,
_mulha3, _muluha3, _mulsa3, _mulusa3, _divqq3, _udivuqq3, _divhq3,
_udivuhq3, _divha3, _udivuha3, _divsa3, _udivusa3.
(LIB2FUNCS_EXCLUDE): Add supported functions.
gcc/
PR target/54222
* avr-modes.def (HA, SA, DA, TA, UTA): Adjust modes.
* avr/avr-fixed.md: New file.
* avr/avr.md: Include it.
(cc): Add: minus.
(adjust_len): Add: minus, minus64, ufract, sfract.
(ALL1, ALL2, ALL4, ORDERED234): New mode iterators.
(MOVMODE): Add: QQ, UQQ, HQ, UHQ, HA, UHA, SQ, USQ, SA, USA.
(MPUSH): Add: HQ, UHQ, HA, UHA, SQ, USQ, SA, USA.
(pushqi1, xload8_A, xload_8, movqi_insn, *reload_inqi, addqi3,
subqi3, ashlqi3, *ashlqi3, ashrqi3, lshrqi3, *lshrqi3, *cmpqi,
cbranchqi4, *cpse.eq): Generalize to handle all 8-bit modes in ALL1.
(*movhi, reload_inhi, addhi3, *addhi3, addhi3_clobber, subhi3,
ashlhi3, *ashlhi3_const, ashrhi3, *ashirhi3_const, lshrhi3,
*lshrhi3_const, *cmphi, cbranchhi4): Generalize to handle all
16-bit modes in ALL2.
(subhi3, casesi, strlenhi): Add clobber when expanding minus:HI.
(*movsi, *reload_insi, addsi3, subsi3, ashlsi3, *ashlsi3_const,
ashrsi3, *ashrhi3_const, *ashrsi3_const, lshrsi3, *lshrsi3_const,
*reversed_tstsi, *cmpsi, cbranchsi4): Generalize to handle all
32-bit modes in ALL4.
* avr-dimode.md (ALL8): New mode iterator.
(adddi3, adddi3_insn, adddi3_const_insn, subdi3, subdi3_insn,
subdi3_const_insn, cbranchdi4, compare_di2,
compare_const_di2, ashrdi3, lshrdi3, rotldi3, ashldi3_insn,
ashrdi3_insn, lshrdi3_insn, rotldi3_insn): Generalize to handle
all 64-bit modes in ALL8.
* config/avr/avr-protos.h (avr_to_int_mode): New prototype.
(avr_out_fract, avr_out_minus, avr_out_minus64): New prototypes.
* config/avr/avr.c (TARGET_FIXED_POINT_SUPPORTED_P): Define to...
(avr_fixed_point_supported_p): ...this new static function.
(TARGET_BUILD_BUILTIN_VA_LIST): Define to...
(avr_build_builtin_va_list): ...this new static function.
(avr_adjust_type_node): New static function.
(avr_scalar_mode_supported_p): Allow if ALL_FIXED_POINT_MODE_P.
(avr_builtin_setjmp_frame_value): Use gen_subhi3 and return new
pseudo instead of gen_rtx_MINUS.
(avr_print_operand, avr_operand_rtx_cost): Handle: CONST_FIXED.
(notice_update_cc): Handle: CC_MINUS.
(output_movqi): Generalize to handle respective fixed-point modes.
(output_movhi, output_movsisf, avr_2word_insn_p): Ditto.
(avr_out_compare, avr_out_plus_1): Also handle fixed-point modes.
(avr_assemble_integer): Ditto.
(output_reload_in_const, output_reload_insisf): Ditto.
(avr_compare_pattern): Skip all modes > 4 bytes.
(avr_2word_insn_p): Skip movuqq_insn, movqq_insn.
(avr_out_fract, avr_out_minus, avr_out_minus64): New functions.
(avr_to_int_mode): New function.
(adjust_insn_length): Handle: ADJUST_LEN_SFRACT,
ADJUST_LEN_UFRACT, ADJUST_LEN_MINUS, ADJUST_LEN_MINUS64.
* config/avr/predicates.md (const0_operand): Allow const_fixed.
(const_operand, const_or_immediate_operand): New.
(nonmemory_or_const_operand): New.
* config/avr/constraints.md (Ynn, Y00, Y01, Y02, Ym1, Ym2, YIJ):
New constraints.
* config/avr/avr.h (LONG_LONG_ACCUM_TYPE_SIZE): Define.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@190644 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libgcc')
-rw-r--r-- | libgcc/ChangeLog | 20 | ||||
-rw-r--r-- | libgcc/config/avr/avr-lib.h | 76 | ||||
-rw-r--r-- | libgcc/config/avr/lib1funcs-fixed.S | 874 | ||||
-rw-r--r-- | libgcc/config/avr/lib1funcs.S | 423 | ||||
-rw-r--r-- | libgcc/config/avr/t-avr | 65 |
5 files changed, 1308 insertions, 150 deletions
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 33ad5185479..60f19491d76 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,23 @@ +2012-08-24 Georg-Johann Lay <avr@gjlay.de> + + PR target/54222 + * config/avr/lib1funcs-fixed.S: New file. + * config/avr/lib1funcs.S: Include it. Undefine some divmodsi + after they are used. + (neg2, neg4): New macros. + (__mulqihi3,__umulqihi3,__mulhi3): Rewrite non-MUL variants. + (__mulhisi3,__umulhisi3,__mulsi3): Rewrite non-MUL variants. + (__umulhisi3): Speed up MUL variant if there is enough flash. + * config/avr/avr-lib.h (TA, UTA): Adjust according to gcc's + avr-modes.def. + * config/avr/t-avr (LIB1ASMFUNCS): Add: _fractqqsf, _fractuqqsf, + _fracthqsf, _fractuhqsf, _fracthasf, _fractuhasf, _fractsasf, + _fractusasf, _fractsfqq, _fractsfuqq, _fractsfhq, _fractsfuhq, + _fractsfha, _fractsfsa, _mulqq3, _muluqq3, _mulhq3, _muluhq3, + _mulha3, _muluha3, _mulsa3, _mulusa3, _divqq3, _udivuqq3, _divhq3, + _udivuhq3, _divha3, _udivuha3, _divsa3, _udivusa3. + (LIB2FUNCS_EXCLUDE): Add supported functions. + 2012-08-22 Georg-Johann Lay <avr@gjlay.de> * Makefile.in (fixed-funcs,fixed-conv-funcs): filter-out diff --git a/libgcc/config/avr/avr-lib.h b/libgcc/config/avr/avr-lib.h index daca4d81f9a..66082eb8a48 100644 --- a/libgcc/config/avr/avr-lib.h +++ b/libgcc/config/avr/avr-lib.h @@ -4,3 +4,79 @@ #define DI SI typedef int QItype __attribute__ ((mode (QI))); #endif + +/* fixed-bit.h does not define functions for TA and UTA because + that part is wrapped in #if MIN_UNITS_PER_WORD > 4. + This would lead to empty functions for TA and UTA. + Thus, supply appropriate defines as if HAVE_[U]TA == 1. + #define HAVE_[U]TA 1 won't work because avr-modes.def + uses ADJUST_BYTESIZE(TA,8) and fixed-bit.h is not generic enough + to arrange for such changes of the mode size. */ + +typedef unsigned _Fract UTAtype __attribute__ ((mode (UTA))); + +#if defined (UTA_MODE) +#define FIXED_SIZE 8 /* in bytes */ +#define INT_C_TYPE UDItype +#define UINT_C_TYPE UDItype +#define HINT_C_TYPE USItype +#define HUINT_C_TYPE USItype +#define MODE_NAME UTA +#define MODE_NAME_S uta +#define MODE_UNSIGNED 1 +#endif + +#if defined (FROM_UTA) +#define FROM_TYPE 4 /* ID for fixed-point */ +#define FROM_MODE_NAME UTA +#define FROM_MODE_NAME_S uta +#define FROM_INT_C_TYPE UDItype +#define FROM_SINT_C_TYPE DItype +#define FROM_UINT_C_TYPE UDItype +#define FROM_MODE_UNSIGNED 1 +#define FROM_FIXED_SIZE 8 /* in bytes */ +#elif defined (TO_UTA) +#define TO_TYPE 4 /* ID for fixed-point */ +#define TO_MODE_NAME UTA +#define TO_MODE_NAME_S uta +#define TO_INT_C_TYPE UDItype +#define TO_SINT_C_TYPE DItype +#define TO_UINT_C_TYPE UDItype +#define TO_MODE_UNSIGNED 1 +#define TO_FIXED_SIZE 8 /* in bytes */ +#endif + +/* Same for TAmode */ + +typedef _Fract TAtype __attribute__ ((mode (TA))); + +#if defined (TA_MODE) +#define FIXED_SIZE 8 /* in bytes */ +#define INT_C_TYPE DItype +#define UINT_C_TYPE UDItype +#define HINT_C_TYPE SItype +#define HUINT_C_TYPE USItype +#define MODE_NAME TA +#define MODE_NAME_S ta +#define MODE_UNSIGNED 0 +#endif + +#if defined (FROM_TA) +#define FROM_TYPE 4 /* ID for fixed-point */ +#define FROM_MODE_NAME TA +#define FROM_MODE_NAME_S ta +#define FROM_INT_C_TYPE DItype +#define FROM_SINT_C_TYPE DItype +#define FROM_UINT_C_TYPE UDItype +#define FROM_MODE_UNSIGNED 0 +#define FROM_FIXED_SIZE 8 /* in bytes */ +#elif defined (TO_TA) +#define TO_TYPE 4 /* ID for fixed-point */ +#define TO_MODE_NAME TA +#define TO_MODE_NAME_S ta +#define TO_INT_C_TYPE DItype +#define TO_SINT_C_TYPE DItype +#define TO_UINT_C_TYPE UDItype +#define TO_MODE_UNSIGNED 0 +#define TO_FIXED_SIZE 8 /* in bytes */ +#endif diff --git a/libgcc/config/avr/lib1funcs-fixed.S b/libgcc/config/avr/lib1funcs-fixed.S new file mode 100644 index 00000000000..c1aff53d5fd --- /dev/null +++ b/libgcc/config/avr/lib1funcs-fixed.S @@ -0,0 +1,874 @@ +/* -*- Mode: Asm -*- */ +;; Copyright (C) 2012 +;; Free Software Foundation, Inc. +;; Contributed by Sean D'Epagnier (sean@depagnier.com) +;; Georg-Johann Lay (avr@gjlay.de) + +;; This file is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by the +;; Free Software Foundation; either version 3, or (at your option) any +;; later version. + +;; In addition to the permissions in the GNU General Public License, the +;; Free Software Foundation gives you unlimited permission to link the +;; compiled version of this file into combinations with other programs, +;; and to distribute those combinations without any restriction coming +;; from the use of this file. (The General Public License restrictions +;; do apply in other respects; for example, they cover modification of +;; the file, and distribution when not linked into a combine +;; executable.) + +;; This file is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Fixed point library routines for AVR +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +.section .text.libgcc.fixed, "ax", @progbits + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Conversions to float +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +#if defined (L_fractqqsf) +DEFUN __fractqqsf + ;; Move in place for SA -> SF conversion + clr r22 + mov r23, r24 + lsl r23 + ;; Sign-extend + sbc r24, r24 + mov r25, r24 + XJMP __fractsasf +ENDF __fractqqsf +#endif /* L_fractqqsf */ + +#if defined (L_fractuqqsf) +DEFUN __fractuqqsf + ;; Move in place for USA -> SF conversion + clr r22 + mov r23, r24 + ;; Zero-extend + clr r24 + clr r25 + XJMP __fractusasf +ENDF __fractuqqsf +#endif /* L_fractuqqsf */ + +#if defined (L_fracthqsf) +DEFUN __fracthqsf + ;; Move in place for SA -> SF conversion + wmov 22, 24 + lsl r22 + rol r23 + ;; Sign-extend + sbc r24, r24 + mov r25, r24 + XJMP __fractsasf +ENDF __fracthqsf +#endif /* L_fracthqsf */ + +#if defined (L_fractuhqsf) +DEFUN __fractuhqsf + ;; Move in place for USA -> SF conversion + wmov 22, 24 + ;; Zero-extend + clr r24 + clr r25 + XJMP __fractusasf +ENDF __fractuhqsf +#endif /* L_fractuhqsf */ + +#if defined (L_fracthasf) +DEFUN __fracthasf + ;; Move in place for SA -> SF conversion + clr r22 + mov r23, r24 + mov r24, r25 + ;; Sign-extend + lsl r25 + sbc r25, r25 + XJMP __fractsasf +ENDF __fracthasf +#endif /* L_fracthasf */ + +#if defined (L_fractuhasf) +DEFUN __fractuhasf + ;; Move in place for USA -> SF conversion + clr r22 + mov r23, r24 + mov r24, r25 + ;; Zero-extend + clr r25 + XJMP __fractusasf +ENDF __fractuhasf +#endif /* L_fractuhasf */ + + +#if defined (L_fractsqsf) +DEFUN __fractsqsf + XCALL __floatsisf + ;; Divide non-zero results by 2^31 to move the + ;; decimal point into place + tst r25 + breq 0f + subi r24, exp_lo (31) + sbci r25, exp_hi (31) +0: ret +ENDF __fractsqsf +#endif /* L_fractsqsf */ + +#if defined (L_fractusqsf) +DEFUN __fractusqsf + XCALL __floatunsisf + ;; Divide non-zero results by 2^32 to move the + ;; decimal point into place + cpse r25, __zero_reg__ + subi r25, exp_hi (32) + ret +ENDF __fractusqsf +#endif /* L_fractusqsf */ + +#if defined (L_fractsasf) +DEFUN __fractsasf + XCALL __floatsisf + ;; Divide non-zero results by 2^16 to move the + ;; decimal point into place + cpse r25, __zero_reg__ + subi r25, exp_hi (16) + ret +ENDF __fractsasf +#endif /* L_fractsasf */ + +#if defined (L_fractusasf) +DEFUN __fractusasf + XCALL __floatunsisf + ;; Divide non-zero results by 2^16 to move the + ;; decimal point into place + cpse r25, __zero_reg__ + subi r25, exp_hi (16) + ret +ENDF __fractusasf +#endif /* L_fractusasf */ + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Conversions from float +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +#if defined (L_fractsfqq) +DEFUN __fractsfqq + ;; Multiply with 2^{24+7} to get a QQ result in r25 + subi r24, exp_lo (-31) + sbci r25, exp_hi (-31) + XCALL __fixsfsi + mov r24, r25 + ret +ENDF __fractsfqq +#endif /* L_fractsfqq */ + +#if defined (L_fractsfuqq) +DEFUN __fractsfuqq + ;; Multiply with 2^{24+8} to get a UQQ result in r25 + subi r25, exp_hi (-32) + XCALL __fixunssfsi + mov r24, r25 + ret +ENDF __fractsfuqq +#endif /* L_fractsfuqq */ + +#if defined (L_fractsfha) +DEFUN __fractsfha + ;; Multiply with 2^24 to get a HA result in r25:r24 + subi r25, exp_hi (-24) + XJMP __fixsfsi +ENDF __fractsfha +#endif /* L_fractsfha */ + +#if defined (L_fractsfuha) +DEFUN __fractsfuha + ;; Multiply with 2^24 to get a UHA result in r25:r24 + subi r25, exp_hi (-24) + XJMP __fixunssfsi +ENDF __fractsfuha +#endif /* L_fractsfuha */ + +#if defined (L_fractsfhq) +DEFUN __fractsfsq +ENDF __fractsfsq + +DEFUN __fractsfhq + ;; Multiply with 2^{16+15} to get a HQ result in r25:r24 + ;; resp. with 2^31 to get a SQ result in r25:r22 + subi r24, exp_lo (-31) + sbci r25, exp_hi (-31) + XJMP __fixsfsi +ENDF __fractsfhq +#endif /* L_fractsfhq */ + +#if defined (L_fractsfuhq) +DEFUN __fractsfusq +ENDF __fractsfusq + +DEFUN __fractsfuhq + ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24 + ;; resp. with 2^32 to get a USQ result in r25:r22 + subi r25, exp_hi (-32) + XJMP __fixunssfsi +ENDF __fractsfuhq +#endif /* L_fractsfuhq */ + +#if defined (L_fractsfsa) +DEFUN __fractsfsa + ;; Multiply with 2^16 to get a SA result in r25:r22 + subi r25, exp_hi (-16) + XJMP __fixsfsi +ENDF __fractsfsa +#endif /* L_fractsfsa */ + +#if defined (L_fractsfusa) +DEFUN __fractsfusa + ;; Multiply with 2^16 to get a USA result in r25:r22 + subi r25, exp_hi (-16) + XJMP __fixunssfsi +ENDF __fractsfusa +#endif /* L_fractsfusa */ + + +;; For multiplication the functions here are called directly from +;; avr-fixed.md instead of using the standard libcall mechanisms. +;; This can make better code because GCC knows exactly which +;; of the call-used registers (not all of them) are clobbered. */ + +/******************************************************* + Fractional Multiplication 8 x 8 without MUL +*******************************************************/ + +#if defined (L_mulqq3) && !defined (__AVR_HAVE_MUL__) +;;; R23 = R24 * R25 +;;; Clobbers: __tmp_reg__, R22, R24, R25 +;;; Rounding: ??? +DEFUN __mulqq3 + XCALL __fmuls + ;; TR 18037 requires that (-1) * (-1) does not overflow + ;; The only input that can produce -1 is (-1)^2. + dec r23 + brvs 0f + inc r23 +0: ret +ENDF __mulqq3 +#endif /* L_mulqq3 && ! HAVE_MUL */ + +/******************************************************* + Fractional Multiply .16 x .16 with and without MUL +*******************************************************/ + +#if defined (L_mulhq3) +;;; Same code with and without MUL, but the interfaces differ: +;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25) +;;; Clobbers: ABI, called by optabs +;;; MUL: (R25:R24) = (R19:R18) * (R27:R26) +;;; Clobbers: __tmp_reg__, R22, R23 +;;; Rounding: -0.5 LSB <= error <= 0.5 LSB +DEFUN __mulhq3 + XCALL __mulhisi3 + ;; Shift result into place + lsl r23 + rol r24 + rol r25 + brvs 1f + ;; Round + sbrc r23, 7 + adiw r24, 1 + ret +1: ;; Overflow. TR 18037 requires (-1)^2 not to overflow + ldi r24, lo8 (0x7fff) + ldi r25, hi8 (0x7fff) + ret +ENDF __mulhq3 +#endif /* defined (L_mulhq3) */ + +#if defined (L_muluhq3) +;;; Same code with and without MUL, but the interfaces differ: +;;; no MUL: (R25:R24) *= (R23:R22) +;;; Clobbers: ABI, called by optabs +;;; MUL: (R25:R24) = (R19:R18) * (R27:R26) +;;; Clobbers: __tmp_reg__, R22, R23 +;;; Rounding: -0.5 LSB < error <= 0.5 LSB +DEFUN __muluhq3 + XCALL __umulhisi3 + ;; Round + sbrc r23, 7 + adiw r24, 1 + ret +ENDF __muluhq3 +#endif /* L_muluhq3 */ + + +/******************************************************* + Fixed Multiply 8.8 x 8.8 with and without MUL +*******************************************************/ + +#if defined (L_mulha3) +;;; Same code with and without MUL, but the interfaces differ: +;;; no MUL: (R25:R24) = (R22:R23) * (R24:R25) +;;; Clobbers: ABI, called by optabs +;;; MUL: (R25:R24) = (R19:R18) * (R27:R26) +;;; Clobbers: __tmp_reg__, R22, R23 +;;; Rounding: -0.5 LSB <= error <= 0.5 LSB +DEFUN __mulha3 + XCALL __mulhisi3 + XJMP __muluha3_round +ENDF __mulha3 +#endif /* L_mulha3 */ + +#if defined (L_muluha3) +;;; Same code with and without MUL, but the interfaces differ: +;;; no MUL: (R25:R24) *= (R23:R22) +;;; Clobbers: ABI, called by optabs +;;; MUL: (R25:R24) = (R19:R18) * (R27:R26) +;;; Clobbers: __tmp_reg__, R22, R23 +;;; Rounding: -0.5 LSB < error <= 0.5 LSB +DEFUN __muluha3 + XCALL __umulhisi3 + XJMP __muluha3_round +ENDF __muluha3 +#endif /* L_muluha3 */ + +#if defined (L_muluha3_round) +DEFUN __muluha3_round + ;; Shift result into place + mov r25, r24 + mov r24, r23 + ;; Round + sbrc r22, 7 + adiw r24, 1 + ret +ENDF __muluha3_round +#endif /* L_muluha3_round */ + + +/******************************************************* + Fixed Multiplication 16.16 x 16.16 +*******************************************************/ + +#if defined (__AVR_HAVE_MUL__) + +;; Multiplier +#define A0 16 +#define A1 A0+1 +#define A2 A1+1 +#define A3 A2+1 + +;; Multiplicand +#define B0 20 +#define B1 B0+1 +#define B2 B1+1 +#define B3 B2+1 + +;; Result +#define C0 24 +#define C1 C0+1 +#define C2 C1+1 +#define C3 C2+1 + +#if defined (L_mulusa3) +;;; (C3:C0) = (A3:A0) * (B3:B0) +;;; Clobbers: __tmp_reg__ +;;; Rounding: -0.5 LSB < error <= 0.5 LSB +DEFUN __mulusa3 + ;; Some of the MUL instructions have LSBs outside the result. + ;; Don't ignore these LSBs in order to tame rounding error. + ;; Use C2/C3 for these LSBs. + + clr C0 + clr C1 + mul A0, B0 $ movw C2, r0 + + mul A1, B0 $ add C3, r0 $ adc C0, r1 + mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1 + + ;; Round + sbrc C3, 7 + adiw C0, 1 + + ;; The following MULs don't have LSBs outside the result. + ;; C2/C3 is the high part. + + mul A0, B2 $ add C0, r0 $ adc C1, r1 $ sbc C2, C2 + mul A1, B1 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0 + mul A2, B0 $ add C0, r0 $ adc C1, r1 $ sbci C2, 0 + neg C2 + + mul A0, B3 $ add C1, r0 $ adc C2, r1 $ sbc C3, C3 + mul A1, B2 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0 + mul A2, B1 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0 + mul A3, B0 $ add C1, r0 $ adc C2, r1 $ sbci C3, 0 + neg C3 + + mul A1, B3 $ add C2, r0 $ adc C3, r1 + mul A2, B2 $ add C2, r0 $ adc C3, r1 + mul A3, B1 $ add C2, r0 $ adc C3, r1 + + mul A2, B3 $ add C3, r0 + mul A3, B2 $ add C3, r0 + + clr __zero_reg__ + ret +ENDF __mulusa3 +#endif /* L_mulusa3 */ + +#if defined (L_mulsa3) +;;; (C3:C0) = (A3:A0) * (B3:B0) +;;; Clobbers: __tmp_reg__ +;;; Rounding: -0.5 LSB <= error <= 0.5 LSB +DEFUN __mulsa3 + XCALL __mulusa3 + tst B3 + brpl 1f + sub C2, A0 + sbc C3, A1 +1: sbrs A3, 7 + ret + sub C2, B0 + sbc C3, B1 + ret +ENDF __mulsa3 +#endif /* L_mulsa3 */ + +#undef A0 +#undef A1 +#undef A2 +#undef A3 +#undef B0 +#undef B1 +#undef B2 +#undef B3 +#undef C0 +#undef C1 +#undef C2 +#undef C3 + +#else /* __AVR_HAVE_MUL__ */ + +#define A0 18 +#define A1 A0+1 +#define A2 A0+2 +#define A3 A0+3 + +#define B0 22 +#define B1 B0+1 +#define B2 B0+2 +#define B3 B0+3 + +#define C0 22 +#define C1 C0+1 +#define C2 C0+2 +#define C3 C0+3 + +;; __tmp_reg__ +#define CC0 0 +;; __zero_reg__ +#define CC1 1 +#define CC2 16 +#define CC3 17 + +#define AA0 26 +#define AA1 AA0+1 +#define AA2 30 +#define AA3 AA2+1 + +#if defined (L_mulsa3) +;;; (R25:R22) *= (R21:R18) +;;; Clobbers: ABI, called by optabs +;;; Rounding: -1 LSB <= error <= 1 LSB +DEFUN __mulsa3 + push B0 + push B1 + bst B3, 7 + XCALL __mulusa3 + ;; A survived in 31:30:27:26 + rcall 1f + pop AA1 + pop AA0 + bst AA3, 7 +1: brtc 9f + ;; 1-extend A/B + sub C2, AA0 + sbc C3, AA1 +9: ret +ENDF __mulsa3 +#endif /* L_mulsa3 */ + +#if defined (L_mulusa3) +;;; (R25:R22) *= (R21:R18) +;;; Clobbers: ABI, called by optabs and __mulsua +;;; Rounding: -1 LSB <= error <= 1 LSB +;;; Does not clobber T and A[] survives in 26, 27, 30, 31 +DEFUN __mulusa3 + push CC2 + push CC3 + ; clear result + clr __tmp_reg__ + wmov CC2, CC0 + ; save multiplicand + wmov AA0, A0 + wmov AA2, A2 + rjmp 3f + + ;; Loop the integral part + +1: ;; CC += A * 2^n; n >= 0 + add CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3 + +2: ;; A <<= 1 + lsl A0 $ rol A1 $ rol A2 $ rol A3 + +3: ;; IBIT(B) >>= 1 + ;; Carry = n-th bit of B; n >= 0 + lsr B3 + ror B2 + brcs 1b + sbci B3, 0 + brne 2b + + ;; Loop the fractional part + ;; B2/B3 is 0 now, use as guard bits for rounding + ;; Restore multiplicand + wmov A0, AA0 + wmov A2, AA2 + rjmp 5f + +4: ;; CC += A:Guard * 2^n; n < 0 + add B3,B2 $ adc CC0,A0 $ adc CC1,A1 $ adc CC2,A2 $ adc CC3,A3 +5: + ;; A:Guard >>= 1 + lsr A3 $ ror A2 $ ror A1 $ ror A0 $ ror B2 + + ;; FBIT(B) <<= 1 + ;; Carry = n-th bit of B; n < 0 + lsl B0 + rol B1 + brcs 4b + sbci B0, 0 + brne 5b + + ;; Move result into place and round + lsl B3 + wmov C2, CC2 + wmov C0, CC0 + clr __zero_reg__ + adc C0, __zero_reg__ + adc C1, __zero_reg__ + adc C2, __zero_reg__ + adc C3, __zero_reg__ + + ;; Epilogue + pop CC3 + pop CC2 + ret +ENDF __mulusa3 +#endif /* L_mulusa3 */ + +#undef A0 +#undef A1 +#undef A2 +#undef A3 +#undef B0 +#undef B1 +#undef B2 +#undef B3 +#undef C0 +#undef C1 +#undef C2 +#undef C3 +#undef AA0 +#undef AA1 +#undef AA2 +#undef AA3 +#undef CC0 +#undef CC1 +#undef CC2 +#undef CC3 + +#endif /* __AVR_HAVE_MUL__ */ + +/******************************************************* + Fractional Division 8 / 8 +*******************************************************/ + +#define r_divd r25 /* dividend */ +#define r_quo r24 /* quotient */ +#define r_div r22 /* divisor */ + +#if defined (L_divqq3) +DEFUN __divqq3 + mov r0, r_divd + eor r0, r_div + sbrc r_div, 7 + neg r_div + sbrc r_divd, 7 + neg r_divd + cp r_divd, r_div + breq __divqq3_minus1 ; if equal return -1 + XCALL __udivuqq3 + lsr r_quo + sbrc r0, 7 ; negate result if needed + neg r_quo + ret +__divqq3_minus1: + ldi r_quo, 0x80 + ret +ENDF __divqq3 +#endif /* defined (L_divqq3) */ + +#if defined (L_udivuqq3) +DEFUN __udivuqq3 + clr r_quo ; clear quotient + inc __zero_reg__ ; init loop counter, used per shift +__udivuqq3_loop: + lsl r_divd ; shift dividend + brcs 0f ; dividend overflow + cp r_divd,r_div ; compare dividend & divisor + brcc 0f ; dividend >= divisor + rol r_quo ; shift quotient (with CARRY) + rjmp __udivuqq3_cont +0: + sub r_divd,r_div ; restore dividend + lsl r_quo ; shift quotient (without CARRY) +__udivuqq3_cont: + lsl __zero_reg__ ; shift loop-counter bit + brne __udivuqq3_loop + com r_quo ; complement result + ; because C flag was complemented in loop + ret +ENDF __udivuqq3 +#endif /* defined (L_udivuqq3) */ + +#undef r_divd +#undef r_quo +#undef r_div + + +/******************************************************* + Fractional Division 16 / 16 +*******************************************************/ +#define r_divdL 26 /* dividend Low */ +#define r_divdH 27 /* dividend Hig */ +#define r_quoL 24 /* quotient Low */ +#define r_quoH 25 /* quotient High */ +#define r_divL 22 /* divisor */ +#define r_divH 23 /* divisor */ +#define r_cnt 21 + +#if defined (L_divhq3) +DEFUN __divhq3 + mov r0, r_divdH + eor r0, r_divH + sbrs r_divH, 7 + rjmp 1f + NEG2 r_divL +1: + sbrs r_divdH, 7 + rjmp 2f + NEG2 r_divdL +2: + cp r_divdL, r_divL + cpc r_divdH, r_divH + breq __divhq3_minus1 ; if equal return -1 + XCALL __udivuhq3 + lsr r_quoH + ror r_quoL + brpl 9f + ;; negate result if needed + NEG2 r_quoL +9: + ret +__divhq3_minus1: + ldi r_quoH, 0x80 + clr r_quoL + ret +ENDF __divhq3 +#endif /* defined (L_divhq3) */ + +#if defined (L_udivuhq3) +DEFUN __udivuhq3 + sub r_quoH,r_quoH ; clear quotient and carry + ;; FALLTHRU +ENDF __udivuhq3 + +DEFUN __udivuha3_common + clr r_quoL ; clear quotient + ldi r_cnt,16 ; init loop counter +__udivuhq3_loop: + rol r_divdL ; shift dividend (with CARRY) + rol r_divdH + brcs __udivuhq3_ep ; dividend overflow + cp r_divdL,r_divL ; compare dividend & divisor + cpc r_divdH,r_divH + brcc __udivuhq3_ep ; dividend >= divisor + rol r_quoL ; shift quotient (with CARRY) + rjmp __udivuhq3_cont +__udivuhq3_ep: + sub r_divdL,r_divL ; restore dividend + sbc r_divdH,r_divH + lsl r_quoL ; shift quotient (without CARRY) +__udivuhq3_cont: + rol r_quoH ; shift quotient + dec r_cnt ; decrement loop counter + brne __udivuhq3_loop + com r_quoL ; complement result + com r_quoH ; because C flag was complemented in loop + ret +ENDF __udivuha3_common +#endif /* defined (L_udivuhq3) */ + +/******************************************************* + Fixed Division 8.8 / 8.8 +*******************************************************/ +#if defined (L_divha3) +DEFUN __divha3 + mov r0, r_divdH + eor r0, r_divH + sbrs r_divH, 7 + rjmp 1f + NEG2 r_divL +1: + sbrs r_divdH, 7 + rjmp 2f + NEG2 r_divdL +2: + XCALL __udivuha3 + sbrs r0, 7 ; negate result if needed + ret + NEG2 r_quoL + ret +ENDF __divha3 +#endif /* defined (L_divha3) */ + +#if defined (L_udivuha3) +DEFUN __udivuha3 + mov r_quoH, r_divdL + mov r_divdL, r_divdH + clr r_divdH + lsl r_quoH ; shift quotient into carry + XJMP __udivuha3_common ; same as fractional after rearrange +ENDF __udivuha3 +#endif /* defined (L_udivuha3) */ + +#undef r_divdL +#undef r_divdH +#undef r_quoL +#undef r_quoH +#undef r_divL +#undef r_divH +#undef r_cnt + +/******************************************************* + Fixed Division 16.16 / 16.16 +*******************************************************/ + +#define r_arg1L 24 /* arg1 gets passed already in place */ +#define r_arg1H 25 +#define r_arg1HL 26 +#define r_arg1HH 27 +#define r_divdL 26 /* dividend Low */ +#define r_divdH 27 +#define r_divdHL 30 +#define r_divdHH 31 /* dividend High */ +#define r_quoL 22 /* quotient Low */ +#define r_quoH 23 +#define r_quoHL 24 +#define r_quoHH 25 /* quotient High */ +#define r_divL 18 /* divisor Low */ +#define r_divH 19 +#define r_divHL 20 +#define r_divHH 21 /* divisor High */ +#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */ + +#if defined (L_divsa3) +DEFUN __divsa3 + mov r0, r_arg1HH + eor r0, r_divHH + sbrs r_divHH, 7 + rjmp 1f + NEG4 r_divL +1: + sbrs r_arg1HH, 7 + rjmp 2f + NEG4 r_arg1L +2: + XCALL __udivusa3 + sbrs r0, 7 ; negate result if needed + ret + NEG4 r_quoL + ret +ENDF __divsa3 +#endif /* defined (L_divsa3) */ + +#if defined (L_udivusa3) +DEFUN __udivusa3 + ldi r_divdHL, 32 ; init loop counter + mov r_cnt, r_divdHL + clr r_divdHL + clr r_divdHH + wmov r_quoL, r_divdHL + lsl r_quoHL ; shift quotient into carry + rol r_quoHH +__udivusa3_loop: + rol r_divdL ; shift dividend (with CARRY) + rol r_divdH + rol r_divdHL + rol r_divdHH + brcs __udivusa3_ep ; dividend overflow + cp r_divdL,r_divL ; compare dividend & divisor + cpc r_divdH,r_divH + cpc r_divdHL,r_divHL + cpc r_divdHH,r_divHH + brcc __udivusa3_ep ; dividend >= divisor + rol r_quoL ; shift quotient (with CARRY) + rjmp __udivusa3_cont +__udivusa3_ep: + sub r_divdL,r_divL ; restore dividend + sbc r_divdH,r_divH + sbc r_divdHL,r_divHL + sbc r_divdHH,r_divHH + lsl r_quoL ; shift quotient (without CARRY) +__udivusa3_cont: + rol r_quoH ; shift quotient + rol r_quoHL + rol r_quoHH + dec r_cnt ; decrement loop counter + brne __udivusa3_loop + com r_quoL ; complement result + com r_quoH ; because C flag was complemented in loop + com r_quoHL + com r_quoHH + ret +ENDF __udivusa3 +#endif /* defined (L_udivusa3) */ + +#undef r_arg1L +#undef r_arg1H +#undef r_arg1HL +#undef r_arg1HH +#undef r_divdL +#undef r_divdH +#undef r_divdHL +#undef r_divdHH +#undef r_quoL +#undef r_quoH +#undef r_quoHL +#undef r_quoHH +#undef r_divL +#undef r_divH +#undef r_divHL +#undef r_divHH +#undef r_cnt diff --git a/libgcc/config/avr/lib1funcs.S b/libgcc/config/avr/lib1funcs.S index 95a7d3d4eeb..6b9879ee7d7 100644 --- a/libgcc/config/avr/lib1funcs.S +++ b/libgcc/config/avr/lib1funcs.S @@ -91,6 +91,35 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see .endfunc .endm +;; Negate a 2-byte value held in consecutive registers +.macro NEG2 reg + com \reg+1 + neg \reg + sbci \reg+1, -1 +.endm + +;; Negate a 4-byte value held in consecutive registers +.macro NEG4 reg + com \reg+3 + com \reg+2 + com \reg+1 +.if \reg >= 16 + neg \reg + sbci \reg+1, -1 + sbci \reg+2, -1 + sbci \reg+3, -1 +.else + com \reg + adc \reg, __zero_reg__ + adc \reg+1, __zero_reg__ + adc \reg+2, __zero_reg__ + adc \reg+3, __zero_reg__ +.endif +.endm + +#define exp_lo(N) hlo8 ((N) << 23) +#define exp_hi(N) hhi8 ((N) << 23) + .section .text.libgcc.mul, "ax", @progbits @@ -126,175 +155,246 @@ ENDF __mulqi3 #endif /* defined (L_mulqi3) */ -#if defined (L_mulqihi3) -DEFUN __mulqihi3 - clr r25 - sbrc r24, 7 - dec r25 - clr r23 - sbrc r22, 7 - dec r22 - XJMP __mulhi3 -ENDF __mulqihi3: -#endif /* defined (L_mulqihi3) */ + +/******************************************************* + Widening Multiplication 16 = 8 x 8 without MUL + Multiplication 16 x 16 without MUL +*******************************************************/ + +#define A0 r22 +#define A1 r23 +#define B0 r24 +#define BB0 r20 +#define B1 r25 +;; Output overlaps input, thus expand result in CC0/1 +#define C0 r24 +#define C1 r25 +#define CC0 __tmp_reg__ +#define CC1 R21 #if defined (L_umulqihi3) +;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24 +;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0 +;;; Clobbers: __tmp_reg__, R21..R23 DEFUN __umulqihi3 - clr r25 - clr r23 - XJMP __mulhi3 + clr A1 + clr B1 + XJMP __mulhi3 ENDF __umulqihi3 -#endif /* defined (L_umulqihi3) */ +#endif /* L_umulqihi3 */ -/******************************************************* - Multiplication 16 x 16 without MUL -*******************************************************/ -#if defined (L_mulhi3) -#define r_arg1L r24 /* multiplier Low */ -#define r_arg1H r25 /* multiplier High */ -#define r_arg2L r22 /* multiplicand Low */ -#define r_arg2H r23 /* multiplicand High */ -#define r_resL __tmp_reg__ /* result Low */ -#define r_resH r21 /* result High */ +#if defined (L_mulqihi3) +;;; R25:R24 = (signed int) R22 * (signed int) R24 +;;; (C1:C0) = (signed int) A0 * (signed int) B0 +;;; Clobbers: __tmp_reg__, R20..R23 +DEFUN __mulqihi3 + ;; Sign-extend B0 + clr B1 + sbrc B0, 7 + com B1 + ;; The multiplication runs twice as fast if A1 is zero, thus: + ;; Zero-extend A0 + clr A1 +#ifdef __AVR_HAVE_JMP_CALL__ + ;; Store B0 * sign of A + clr BB0 + sbrc A0, 7 + mov BB0, B0 + call __mulhi3 +#else /* have no CALL */ + ;; Skip sign-extension of A if A >= 0 + ;; Same size as with the first alternative but avoids errata skip + ;; and is faster if A >= 0 + sbrs A0, 7 + rjmp __mulhi3 + ;; If A < 0 store B + mov BB0, B0 + rcall __mulhi3 +#endif /* HAVE_JMP_CALL */ + ;; 1-extend A after the multiplication + sub C1, BB0 + ret +ENDF __mulqihi3 +#endif /* L_mulqihi3 */ +#if defined (L_mulhi3) +;;; R25:R24 = R23:R22 * R25:R24 +;;; (C1:C0) = (A1:A0) * (B1:B0) +;;; Clobbers: __tmp_reg__, R21..R23 DEFUN __mulhi3 - clr r_resH ; clear result - clr r_resL ; clear result -__mulhi3_loop: - sbrs r_arg1L,0 - rjmp __mulhi3_skip1 - add r_resL,r_arg2L ; result + multiplicand - adc r_resH,r_arg2H -__mulhi3_skip1: - add r_arg2L,r_arg2L ; shift multiplicand - adc r_arg2H,r_arg2H - - cp r_arg2L,__zero_reg__ - cpc r_arg2H,__zero_reg__ - breq __mulhi3_exit ; while multiplicand != 0 - - lsr r_arg1H ; gets LSB of multiplier - ror r_arg1L - sbiw r_arg1L,0 - brne __mulhi3_loop ; exit if multiplier = 0 -__mulhi3_exit: - mov r_arg1H,r_resH ; result to return register - mov r_arg1L,r_resL - ret -ENDF __mulhi3 -#undef r_arg1L -#undef r_arg1H -#undef r_arg2L -#undef r_arg2H -#undef r_resL -#undef r_resH + ;; Clear result + clr CC0 + clr CC1 + rjmp 3f +1: + ;; Bit n of A is 1 --> C += B << n + add CC0, B0 + adc CC1, B1 +2: + lsl B0 + rol B1 +3: + ;; If B == 0 we are ready + sbiw B0, 0 + breq 9f + + ;; Carry = n-th bit of A + lsr A1 + ror A0 + ;; If bit n of A is set, then go add B * 2^n to C + brcs 1b + + ;; Carry = 0 --> The ROR above acts like CP A0, 0 + ;; Thus, it is sufficient to CPC the high part to test A against 0 + cpc A1, __zero_reg__ + ;; Only proceed if A != 0 + brne 2b +9: + ;; Move Result into place + mov C0, CC0 + mov C1, CC1 + ret +ENDF __mulhi3 +#endif /* L_mulhi3 */ -#endif /* defined (L_mulhi3) */ +#undef A0 +#undef A1 +#undef B0 +#undef BB0 +#undef B1 +#undef C0 +#undef C1 +#undef CC0 +#undef CC1 + + +#define A0 22 +#define A1 A0+1 +#define A2 A0+2 +#define A3 A0+3 + +#define B0 18 +#define B1 B0+1 +#define B2 B0+2 +#define B3 B0+3 + +#define CC0 26 +#define CC1 CC0+1 +#define CC2 30 +#define CC3 CC2+1 + +#define C0 22 +#define C1 C0+1 +#define C2 C0+2 +#define C3 C0+3 /******************************************************* Widening Multiplication 32 = 16 x 16 without MUL *******************************************************/ -#if defined (L_mulhisi3) -DEFUN __mulhisi3 -;;; FIXME: This is dead code (noone calls it) - mov_l r18, r24 - mov_h r19, r25 - clr r24 - sbrc r23, 7 - dec r24 - mov r25, r24 - clr r20 - sbrc r19, 7 - dec r20 - mov r21, r20 - XJMP __mulsi3 -ENDF __mulhisi3 -#endif /* defined (L_mulhisi3) */ - #if defined (L_umulhisi3) DEFUN __umulhisi3 -;;; FIXME: This is dead code (noone calls it) - mov_l r18, r24 - mov_h r19, r25 - clr r24 - clr r25 - mov_l r20, r24 - mov_h r21, r25 + wmov B0, 24 + ;; Zero-extend B + clr B2 + clr B3 + ;; Zero-extend A + wmov A2, B2 XJMP __mulsi3 ENDF __umulhisi3 -#endif /* defined (L_umulhisi3) */ +#endif /* L_umulhisi3 */ + +#if defined (L_mulhisi3) +DEFUN __mulhisi3 + wmov B0, 24 + ;; Sign-extend B + lsl r25 + sbc B2, B2 + mov B3, B2 +#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ + ;; Sign-extend A + clr A2 + sbrc A1, 7 + com A2 + mov A3, A2 + XJMP __mulsi3 +#else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */ + ;; Zero-extend A and __mulsi3 will run at least twice as fast + ;; compared to a sign-extended A. + clr A2 + clr A3 + sbrs A1, 7 + XJMP __mulsi3 + ;; If A < 0 then perform the B * 0xffff.... before the + ;; very multiplication by initializing the high part of the + ;; result CC with -B. + wmov CC2, A2 + sub CC2, B0 + sbc CC3, B1 + XJMP __mulsi3_helper +#endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */ +ENDF __mulhisi3 +#endif /* L_mulhisi3 */ + -#if defined (L_mulsi3) /******************************************************* Multiplication 32 x 32 without MUL *******************************************************/ -#define r_arg1L r22 /* multiplier Low */ -#define r_arg1H r23 -#define r_arg1HL r24 -#define r_arg1HH r25 /* multiplier High */ - -#define r_arg2L r18 /* multiplicand Low */ -#define r_arg2H r19 -#define r_arg2HL r20 -#define r_arg2HH r21 /* multiplicand High */ - -#define r_resL r26 /* result Low */ -#define r_resH r27 -#define r_resHL r30 -#define r_resHH r31 /* result High */ +#if defined (L_mulsi3) DEFUN __mulsi3 - clr r_resHH ; clear result - clr r_resHL ; clear result - clr r_resH ; clear result - clr r_resL ; clear result -__mulsi3_loop: - sbrs r_arg1L,0 - rjmp __mulsi3_skip1 - add r_resL,r_arg2L ; result + multiplicand - adc r_resH,r_arg2H - adc r_resHL,r_arg2HL - adc r_resHH,r_arg2HH -__mulsi3_skip1: - add r_arg2L,r_arg2L ; shift multiplicand - adc r_arg2H,r_arg2H - adc r_arg2HL,r_arg2HL - adc r_arg2HH,r_arg2HH - - lsr r_arg1HH ; gets LSB of multiplier - ror r_arg1HL - ror r_arg1H - ror r_arg1L - brne __mulsi3_loop - sbiw r_arg1HL,0 - cpc r_arg1H,r_arg1L - brne __mulsi3_loop ; exit if multiplier = 0 -__mulsi3_exit: - mov_h r_arg1HH,r_resHH ; result to return register - mov_l r_arg1HL,r_resHL - mov_h r_arg1H,r_resH - mov_l r_arg1L,r_resL - ret -ENDF __mulsi3 + ;; Clear result + clr CC2 + clr CC3 + ;; FALLTHRU +ENDF __mulsi3 -#undef r_arg1L -#undef r_arg1H -#undef r_arg1HL -#undef r_arg1HH - -#undef r_arg2L -#undef r_arg2H -#undef r_arg2HL -#undef r_arg2HH - -#undef r_resL -#undef r_resH -#undef r_resHL -#undef r_resHH +DEFUN __mulsi3_helper + clr CC0 + clr CC1 + rjmp 3f + +1: ;; If bit n of A is set, then add B * 2^n to the result in CC + ;; CC += B + add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3 + +2: ;; B <<= 1 + lsl B0 $ rol B1 $ rol B2 $ rol B3 + +3: ;; A >>= 1: Carry = n-th bit of A + lsr A3 $ ror A2 $ ror A1 $ ror A0 + + brcs 1b + ;; Only continue if A != 0 + sbci A1, 0 + brne 2b + sbiw A2, 0 + brne 2b + + ;; All bits of A are consumed: Copy result to return register C + wmov C0, CC0 + wmov C2, CC2 + ret +ENDF __mulsi3_helper +#endif /* L_mulsi3 */ -#endif /* defined (L_mulsi3) */ +#undef A0 +#undef A1 +#undef A2 +#undef A3 +#undef B0 +#undef B1 +#undef B2 +#undef B3 +#undef C0 +#undef C1 +#undef C2 +#undef C3 +#undef CC0 +#undef CC1 +#undef CC2 +#undef CC3 #endif /* !defined (__AVR_HAVE_MUL__) */ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -316,7 +416,7 @@ ENDF __mulsi3 #define C3 C0+3 /******************************************************* - Widening Multiplication 32 = 16 x 16 + Widening Multiplication 32 = 16 x 16 with MUL *******************************************************/ #if defined (L_mulhisi3) @@ -364,7 +464,17 @@ DEFUN __umulhisi3 mul A1, B1 movw C2, r0 mul A0, B1 +#ifdef __AVR_HAVE_JMP_CALL__ + ;; This function is used by many other routines, often multiple times. + ;; Therefore, if the flash size is not too limited, avoid the RCALL + ;; and inverst 6 Bytes to speed things up. + add C1, r0 + adc C2, r1 + clr __zero_reg__ + adc C3, __zero_reg__ +#else rcall 1f +#endif mul A1, B0 1: add C1, r0 adc C2, r1 @@ -375,7 +485,7 @@ ENDF __umulhisi3 #endif /* L_umulhisi3 */ /******************************************************* - Widening Multiplication 32 = 16 x 32 + Widening Multiplication 32 = 16 x 32 with MUL *******************************************************/ #if defined (L_mulshisi3) @@ -425,7 +535,7 @@ ENDF __muluhisi3 #endif /* L_muluhisi3 */ /******************************************************* - Multiplication 32 x 32 + Multiplication 32 x 32 with MUL *******************************************************/ #if defined (L_mulsi3) @@ -468,7 +578,7 @@ ENDF __mulsi3 #endif /* __AVR_HAVE_MUL__ */ /******************************************************* - Multiplication 24 x 24 + Multiplication 24 x 24 with MUL *******************************************************/ #if defined (L_mulpsi3) @@ -1247,6 +1357,19 @@ __divmodsi4_exit: ENDF __divmodsi4 #endif /* defined (L_divmodsi4) */ +#undef r_remHH +#undef r_remHL +#undef r_remH +#undef r_remL +#undef r_arg1HH +#undef r_arg1HL +#undef r_arg1H +#undef r_arg1L +#undef r_arg2HH +#undef r_arg2HL +#undef r_arg2H +#undef r_arg2L +#undef r_cnt /******************************************************* Division 64 / 64 @@ -2757,9 +2880,7 @@ DEFUN __fmulsu_exit XJMP __fmul 1: XCALL __fmul ;; C = -C iff A0.7 = 1 - com C1 - neg C0 - sbci C1, -1 + NEG2 C0 ret ENDF __fmulsu_exit #endif /* L_fmulsu */ @@ -2794,3 +2915,5 @@ ENDF __fmul #undef B1 #undef C0 #undef C1 + +#include "lib1funcs-fixed.S" diff --git a/libgcc/config/avr/t-avr b/libgcc/config/avr/t-avr index 43caa94ca2a..6f783cd9d52 100644 --- a/libgcc/config/avr/t-avr +++ b/libgcc/config/avr/t-avr @@ -2,6 +2,7 @@ LIB1ASMSRC = avr/lib1funcs.S LIB1ASMFUNCS = \ _mulqi3 \ _mulhi3 \ + _mulqihi3 _umulqihi3 \ _mulpsi3 _mulsqipsi3 \ _mulhisi3 \ _umulhisi3 \ @@ -55,6 +56,24 @@ LIB1ASMFUNCS = \ _cmpdi2 _cmpdi2_s8 \ _fmul _fmuls _fmulsu +# Fixed point routines in avr/lib1funcs-fixed.S +LIB1ASMFUNCS += \ + _fractqqsf _fractuqqsf \ + _fracthqsf _fractuhqsf _fracthasf _fractuhasf \ + _fractsasf _fractusasf _fractsqsf _fractusqsf \ + \ + _fractsfqq _fractsfuqq \ + _fractsfhq _fractsfuhq _fractsfha _fractsfuha \ + _fractsfsa _fractsfusa \ + _mulqq3 \ + _mulhq3 _muluhq3 \ + _mulha3 _muluha3 _muluha3_round \ + _mulsa3 _mulusa3 \ + _divqq3 _udivuqq3 \ + _divhq3 _udivuhq3 \ + _divha3 _udivuha3 \ + _divsa3 _udivusa3 + LIB2FUNCS_EXCLUDE = \ _moddi3 _umoddi3 \ _clz @@ -81,3 +100,49 @@ libgcc-objects += $(patsubst %,%$(objext),$(hiintfuncs16)) ifeq ($(enable_shared),yes) libgcc-s-objects += $(patsubst %,%_s$(objext),$(hiintfuncs16)) endif + + +# Filter out supported conversions from fixed-bit.c + +conv_XY=$(conv)$(mode1)$(mode2) +conv_X=$(conv)$(mode) + +# Conversions supported by the compiler + +convf_modes = QI UQI QQ UQQ \ + HI UHI HQ UHQ HA UHA \ + SI USI SQ USQ SA USA \ + DI UDI DQ UDQ DA UDA \ + TI UTI TQ UTQ TA UTA + +LIB2FUNCS_EXCLUDE += \ + $(foreach conv,_fract _fractuns,\ + $(foreach mode1,$(convf_modes),\ + $(foreach mode2,$(convf_modes),$(conv_XY)))) + +# Conversions supported by lib1funcs-fixed.S + +conv_to_sf_modes = QQ UQQ HQ UHQ HA UHA SQ USQ SA USA +conv_from_sf_modes = QQ UQQ HQ UHQ HA UHA SA USA + +LIB2FUNCS_EXCLUDE += \ + $(foreach conv,_fract, \ + $(foreach mode1,$(conv_to_sf_modes), \ + $(foreach mode2,SF,$(conv_XY)))) + +LIB2FUNCS_EXCLUDE += \ + $(foreach conv,_fract,\ + $(foreach mode1,SF,\ + $(foreach mode2,$(conv_from_sf_modes),$(conv_XY)))) + +# Arithmetik supported by the compiler + +allfix_modes = QQ UQQ HQ UHQ HA UHA SQ USQ SA USA DA UDA DQ UDQ TQ UTQ TA UTA + +LIB2FUNCS_EXCLUDE += \ + $(foreach conv,_add _sub,\ + $(foreach mode,$(allfix_modes),$(conv_X)3)) + +LIB2FUNCS_EXCLUDE += \ + $(foreach conv,_lshr _ashl _ashr _cmp,\ + $(foreach mode,$(allfix_modes),$(conv_X))) |