summaryrefslogtreecommitdiff
path: root/libgcc/config/arc/ieee-754/arc600-mul64
diff options
context:
space:
mode:
Diffstat (limited to 'libgcc/config/arc/ieee-754/arc600-mul64')
-rw-r--r--libgcc/config/arc/ieee-754/arc600-mul64/divdf3.S410
-rw-r--r--libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S274
-rw-r--r--libgcc/config/arc/ieee-754/arc600-mul64/muldf3.S234
-rw-r--r--libgcc/config/arc/ieee-754/arc600-mul64/mulsf3.S180
4 files changed, 1098 insertions, 0 deletions
diff --git a/libgcc/config/arc/ieee-754/arc600-mul64/divdf3.S b/libgcc/config/arc/ieee-754/arc600-mul64/divdf3.S
new file mode 100644
index 00000000000..0584b53f77b
--- /dev/null
+++ b/libgcc/config/arc/ieee-754/arc600-mul64/divdf3.S
@@ -0,0 +1,410 @@
+/* Copyright (C) 2008-2013 Free Software Foundation, Inc.
+ Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
+ on behalf of Synopsys Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/*
+ to calculate a := b/x as b*y, with y := 1/x:
+ - x is in the range [1..2)
+ - calculate 15..18 bit inverse y0 using a table of approximating polynoms.
+ Precision is higher for polynoms used to evaluate input with larger
+ value.
+ - Do one newton-raphson iteration step to double the precision,
+ then multiply this with the divisor
+ -> more time to decide if dividend is subnormal
+ - the worst error propagation is on the side of the value range
+ with the least initial defect, thus giving us about 30 bits precision.
+ The truncation error for the either is less than 1 + x/2 ulp.
+ A 31 bit inverse can be simply calculated by using x with implicit 1
+ and chaining the multiplies. For a 32 bit inverse, we multiply y0^2
+ with the bare fraction part of x, then add in y0^2 for the implicit
+ 1 of x.
+ - If calculating a 31 bit inverse, the systematic error is less than
+ -1 ulp; likewise, for 32 bit, it is less than -2 ulp.
+ - If we calculate our seed with a 32 bit fraction, we can archive a
+ tentative result strictly better than -2 / +2.5 (1) ulp/128, i.e. we
+ only need to take the step to calculate the 2nd stage rest and
+ rounding adjust 1/32th of the time. However, if we use a 20 bit
+ fraction for the seed, the negative error can exceed -2 ulp/128, (2)
+ thus for a simple add / tst check, we need to do the 2nd stage
+ rest calculation/ rounding adjust 1/16th of the time.
+ (1): The inexactness of the 32 bit inverse contributes an error in the
+ range of (-1 .. +(1+x/2) ) ulp/128. Leaving out the low word of the
+ rest contributes an error < +1/x ulp/128 . In the interval [1,2),
+ x/2 + 1/x <= 1.5 .
+ (2): Unless proven otherwise. I have not actually looked for an
+ example where -2 ulp/128 is exceeded, and my calculations indicate
+ that the excess, if existent, is less than -1/512 ulp.
+ ??? The algorithm is still based on the ARC700 optimized code.
+ Maybe we could make better use of 64 bit multiply results and/or mmed .
+ */
+#include "../arc-ieee-754.h"
+
+/* N.B. fp-bit.c does double rounding on denormal numbers. */
+#if 0 /* DEBUG */
+ .global __divdf3
+ FUNC(__divdf3)
+ .balign 4
+__divdf3:
+ push_s blink
+ push_s r2
+ push_s r3
+ push_s r0
+ bl.d __divdf3_c
+ push_s r1
+ ld_s r2,[sp,12]
+ ld_s r3,[sp,8]
+ st_s r0,[sp,12]
+ st_s r1,[sp,8]
+ pop_s r1
+ bl.d __divdf3_asm
+ pop_s r0
+ pop_s r3
+ pop_s r2
+ pop_s blink
+ cmp r0,r2
+ cmp.eq r1,r3
+ jeq_s [blink]
+ and r12,DBL0H,DBL1H
+ bic.f 0,0x7ff80000,r12 ; both NaN -> OK
+ jeq_s [blink]
+ bl abort
+ ENDFUNC(__divdf3)
+#define __divdf3 __divdf3_asm
+#endif /* DEBUG */
+
+ FUNC(__divdf3)
+ .balign 4
+.L7ff00000:
+ .long 0x7ff00000
+.Ldivtab:
+ .long 0xfc0fffe1
+ .long 0xf46ffdfb
+ .long 0xed1ffa54
+ .long 0xe61ff515
+ .long 0xdf7fee75
+ .long 0xd91fe680
+ .long 0xd2ffdd52
+ .long 0xcd1fd30c
+ .long 0xc77fc7cd
+ .long 0xc21fbbb6
+ .long 0xbcefaec0
+ .long 0xb7efa100
+ .long 0xb32f92bf
+ .long 0xae8f83b7
+ .long 0xaa2f7467
+ .long 0xa5ef6479
+ .long 0xa1cf53fa
+ .long 0x9ddf433e
+ .long 0x9a0f3216
+ .long 0x965f2091
+ .long 0x92df0f11
+ .long 0x8f6efd05
+ .long 0x8c1eeacc
+ .long 0x88eed876
+ .long 0x85dec615
+ .long 0x82eeb3b9
+ .long 0x800ea10b
+ .long 0x7d3e8e0f
+ .long 0x7a8e7b3f
+ .long 0x77ee6836
+ .long 0x756e5576
+ .long 0x72fe4293
+ .long 0x709e2f93
+ .long 0x6e4e1c7f
+ .long 0x6c0e095e
+ .long 0x69edf6c5
+ .long 0x67cde3a5
+ .long 0x65cdd125
+ .long 0x63cdbe25
+ .long 0x61ddab3f
+ .long 0x600d991f
+ .long 0x5e3d868c
+ .long 0x5c6d7384
+ .long 0x5abd615f
+ .long 0x590d4ecd
+ .long 0x576d3c83
+ .long 0x55dd2a89
+ .long 0x545d18e9
+ .long 0x52dd06e9
+ .long 0x516cf54e
+ .long 0x4ffce356
+ .long 0x4e9cd1ce
+ .long 0x4d3cbfec
+ .long 0x4becae86
+ .long 0x4aac9da4
+ .long 0x496c8c73
+ .long 0x483c7bd3
+ .long 0x470c6ae8
+ .long 0x45dc59af
+ .long 0x44bc4915
+ .long 0x43ac3924
+ .long 0x428c27fb
+ .long 0x418c187a
+ .long 0x407c07bd
+
+__divdf3_support: /* This label makes debugger output saner. */
+ .balign 4
+.Ldenorm_dbl1:
+ brge r6, \
+ 0x43500000,.Linf_NaN ; large number / denorm -> Inf
+ bmsk.f r12,DBL1H,19
+ mov.eq r12,DBL1L
+ mov.eq DBL1L,0
+ sub.eq r7,r7,32
+ norm.f r11,r12 ; flag for x/0 -> Inf check
+ beq_s .Linf_NaN
+ mov.mi r11,0
+ add.pl r11,r11,1
+ add_s r12,r12,r12
+ asl r8,r12,r11
+ rsub r12,r11,31
+ lsr r12,DBL1L,r12
+ tst_s DBL1H,DBL1H
+ or r8,r8,r12
+ lsr r4,r8,26
+ lsr DBL1H,r8,12
+ ld.as r4,[r10,r4]
+ bxor.mi DBL1H,DBL1H,31
+ sub r11,r11,11
+ asl DBL1L,DBL1L,r11
+ sub r11,r11,1
+ mulu64 r4,r8
+ sub r7,r7,r11
+ b.d .Lpast_denorm_dbl1
+ asl r7,r7,20
+
+ .balign 4
+.Ldenorm_dbl0:
+ bmsk.f r12,DBL0H,19
+ ; wb stall
+ mov.eq r12,DBL0L
+ sub.eq r6,r6,32
+ norm.f r11,r12 ; flag for 0/x -> 0 check
+ brge r7, \
+ 0x43500000, .Lret0_2 ; denorm/large number -> 0
+ beq_s .Lret0_2
+ mov.mi r11,0
+ add.pl r11,r11,1
+ asl r12,r12,r11
+ sub r6,r6,r11
+ add.f 0,r6,31
+ lsr r10,DBL0L,r6
+ mov.mi r10,0
+ add r6,r6,11+32
+ neg.f r11,r6
+ asl DBL0L,DBL0L,r11
+ mov.pl DBL0L,0
+ sub r6,r6,32-1
+ b.d .Lpast_denorm_dbl0
+ asl r6,r6,20
+
+.Linf_NaN:
+ tst_s DBL0L,DBL0L ; 0/0 -> NaN
+ xor_s DBL1H,DBL1H,DBL0H
+ bclr.eq.f DBL0H,DBL0H,31
+ bmsk DBL0H,DBL1H,30
+ xor_s DBL0H,DBL0H,DBL1H
+ sub.eq DBL0H,DBL0H,1
+ mov_s DBL0L,0
+ j_s.d [blink]
+ or DBL0H,DBL0H,r9
+ .balign 4
+.Lret0_2:
+ xor_s DBL1H,DBL1H,DBL0H
+ mov_s DBL0L,0
+ bmsk DBL0H,DBL1H,30
+ j_s.d [blink]
+ xor_s DBL0H,DBL0H,DBL1H
+ .balign 4
+ .global __divdf3
+/* N.B. the spacing between divtab and the sub3 to get its address must
+ be a multiple of 8. */
+__divdf3:
+ asl r8,DBL1H,12
+ lsr r4,r8,26
+ sub3 r10,pcl,61; (.-.Ldivtab) >> 3
+ ld.as r9,[pcl,-124]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
+ ld.as r4,[r10,r4]
+ lsr r12,DBL1L,20
+ and.f r7,DBL1H,r9
+ or r8,r8,r12
+ mulu64 r4,r8
+ beq.d .Ldenorm_dbl1
+.Lpast_denorm_dbl1:
+ and.f r6,DBL0H,r9
+ breq.d r7,r9,.Linf_nan_dbl1
+ asl r4,r4,12
+ sub r4,r4,mhi
+ mulu64 r4,r4
+ beq.d .Ldenorm_dbl0
+ lsr r8,r8,1
+ breq.d r6,r9,.Linf_nan_dbl0
+ asl r12,DBL0H,11
+ lsr r10,DBL0L,21
+.Lpast_denorm_dbl0:
+ bset r8,r8,31
+ mulu64 mhi,r8
+ add_s r12,r12,r10
+ bset r5,r12,31
+ cmp r5,r8
+ cmp.eq DBL0L,DBL1L
+ lsr.cc r5,r5,1
+ sub r4,r4,mhi ; u1.31 inverse, about 30 bit
+ mulu64 r5,r4 ; result fraction highpart
+ lsr r8,r8,2 ; u3.29
+ add r5,r6, /* wait for immediate */ \
+ 0x3fe00000
+ mov r11,mhi ; result fraction highpart
+ mulu64 r11,r8 ; u-28.31
+ asl_s DBL1L,DBL1L,9 ; u-29.23:9
+ sbc r6,r5,r7
+ mov r12,mlo ; u-28.31
+ mulu64 r11,DBL1L ; mhi: u-28.23:9
+ add.cs DBL0L,DBL0L,DBL0L
+ asl_s DBL0L,DBL0L,6 ; u-26.25:7
+ asl r10,r11,23
+ sub_l DBL0L,DBL0L,r12
+ lsr r7,r11,9
+ sub r5,DBL0L,mhi ; rest msw ; u-26.31:0
+ mul64 r5,r4 ; mhi: result fraction lowpart
+ xor.f 0,DBL0H,DBL1H
+ and DBL0H,r6,r9
+ add_s DBL0H,DBL0H,r7
+ bclr r12,r9,20 ; 0x7fe00000
+ brhs.d r6,r12,.Linf_denorm
+ bxor.mi DBL0H,DBL0H,31
+ add.f r12,mhi,0x11
+ asr r9,r12,5
+ sub.mi DBL0H,DBL0H,1
+ add.f DBL0L,r9,r10
+ tst r12,0x1c
+ jne.d [blink]
+ add.cs DBL0H,DBL0H,1
+ /* work out exact rounding if we fall through here. */
+ /* We know that the exact result cannot be represented in double
+ precision. Find the mid-point between the two nearest
+ representable values, multiply with the divisor, and check if
+ the result is larger than the dividend. Since we want to know
+ only the sign bit, it is sufficient to calculate only the
+ highpart of the lower 64 bits. */
+ mulu64 r11,DBL1L ; rest before considering r12 in r5 : -mlo
+ sub.f DBL0L,DBL0L,1
+ asl r12,r9,2 ; u-22.30:2
+ sub.cs DBL0H,DBL0H,1
+ sub.f r12,r12,2
+ mov r10,mlo ; rest before considering r12 in r5 : -r10
+ mulu64 r12,DBL1L ; mhi: u-51.32
+ asl r5,r5,25 ; s-51.7:25
+ lsr r10,r10,7 ; u-51.30:2
+ mov r7,mhi ; u-51.32
+ mulu64 r12,r8 ; mlo: u-51.31:1
+ sub r5,r5,r10
+ add.mi r5,r5,DBL1L ; signed multiply adjust for r12*DBL1L
+ bset r7,r7,0 ; make sure that the result is not zero, and that
+ sub r5,r5,r7 ; a highpart zero appears negative
+ sub.f r5,r5,mlo ; rest msw
+ add.pl.f DBL0L,DBL0L,1
+ j_s.d [blink]
+ add.eq DBL0H,DBL0H,1
+
+.Linf_nan_dbl1: ; 0/Inf -> NaN Inf/Inf -> NaN x/Inf-> 0 x/NaN -> NaN
+ or.f 0,r6,DBL0L
+ cmp.ne r6,r9
+ not_s DBL0L,DBL1H
+ sub_s.ne DBL0L,DBL0L,DBL0L
+ tst_s DBL0H,DBL0H
+ add_s DBL0H,DBL1H,DBL0L
+ j_s.d [blink]
+ bxor.mi DBL0H,DBL0H,31
+.Linf_nan_dbl0:
+ tst_s DBL1H,DBL1H
+ j_s.d [blink]
+ bxor.mi DBL0H,DBL0H,31
+ .balign 4
+.Linf_denorm:
+ lsr r12,r6,28
+ brlo.d r12,0xc,.Linf
+.Ldenorm:
+ asr r6,r6,20
+ neg r9,r6
+ mov_s DBL0H,0
+ brhs.d r9,54,.Lret0
+ bxor.mi DBL0H,DBL0H,31
+ add r12,mhi,1
+ and r12,r12,-4
+ rsub r7,r6,5
+ asr r10,r12,28
+ bmsk r4,r12,27
+ min r7,r7,31
+ asr DBL0L,r4,r7
+ add DBL1H,r11,r10
+ abs.f r10,r4
+ sub.mi r10,r10,1
+ add.f r7,r6,32-5
+ asl r4,r4,r7
+ mov.mi r4,r10
+ add.f r10,r6,23
+ rsub r7,r6,9
+ lsr r7,DBL1H,r7
+ asl r10,DBL1H,r10
+ or.pnz DBL0H,DBL0H,r7
+ or.mi r4,r4,r10
+ mov.mi r10,r7
+ add.f DBL0L,r10,DBL0L
+ add.cs.f DBL0H,DBL0H,1 ; carry clear after this point
+ bxor.f 0,r4,31
+ add.pnz.f DBL0L,DBL0L,1
+ add.cs.f DBL0H,DBL0H,1
+ jne_s [blink]
+ /* Calculation so far was not conclusive; calculate further rest. */
+ mulu64 r11,DBL1L ; rest before considering r12 in r5 : -mlo
+ asr.f r12,r12,3
+ asl r5,r5,25 ; s-51.7:25
+ mov r11,mlo ; rest before considering r12 in r5 : -r11
+ mulu64 r12,r8 ; u-51.31:1
+ and r9,DBL0L,1 ; tie-breaker: round to even
+ lsr r11,r11,7 ; u-51.30:2
+ mov DBL1H,mlo ; u-51.31:1
+ mulu64 r12,DBL1L ; u-51.62:2
+ sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L
+ add_s DBL1H,DBL1H,r11
+ sub DBL1H,DBL1H,r5 ; -rest msw
+ add_s DBL1H,DBL1H,mhi ; -rest msw
+ add.f 0,DBL1H,DBL1H ; can't ror.f by 32 :-(
+ tst_s DBL1H,DBL1H
+ cmp.eq mlo,r9
+ add.cs.f DBL0L,DBL0L,1
+ j_s.d [blink]
+ add.cs DBL0H,DBL0H,1
+
+.Lret0:
+ /* return +- 0 */
+ j_s.d [blink]
+ mov_s DBL0L,0
+.Linf:
+ mov_s DBL0H,r9
+ mov_s DBL0L,0
+ j_s.d [blink]
+ bxor.mi DBL0H,DBL0H,31
+ ENDFUNC(__divdf3)
diff --git a/libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S b/libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S
new file mode 100644
index 00000000000..a9934f0ad94
--- /dev/null
+++ b/libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S
@@ -0,0 +1,274 @@
+/* Copyright (C) 2008-2013 Free Software Foundation, Inc.
+ Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
+ on behalf of Synopsys Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/*
+ - calculate 15..18 bit inverse using a table of approximating polynoms.
+ precision is higher for polynoms used to evaluate input with larger
+ value.
+ - do one newton-raphson iteration step to double the precision,
+ then multiply this with the divisor
+ -> more time to decide if dividend is subnormal
+ - the worst error propagation is on the side of the value range
+ with the least initial defect, thus giving us about 30 bits precision.
+ */
+#include "../arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+ .global __divsf3
+ FUNC(__divsf3)
+ .balign 4
+__divsf3:
+ push_s blink
+ push_s r1
+ bl.d __divsf3_c
+ push_s r0
+ ld_s r1,[sp,4]
+ st_s r0,[sp,4]
+ bl.d __divsf3_asm
+ pop_s r0
+ pop_s r1
+ pop_s blink
+ cmp r0,r1
+#if 1
+ bne abort
+ jeq_s [blink]
+ b abort
+#else
+ bne abort
+ j_s [blink]
+#endif
+ ENDFUNC(__divsf3)
+#define __divsf3 __divsf3_asm
+#endif /* DEBUG */
+
+ FUNC(__divsf3)
+ .balign 4
+.Ldivtab:
+ .long 0xfc0ffff0
+ .long 0xf46ffefd
+ .long 0xed1ffd2a
+ .long 0xe627fa8e
+ .long 0xdf7ff73b
+ .long 0xd917f33b
+ .long 0xd2f7eea3
+ .long 0xcd1fe986
+ .long 0xc77fe3e7
+ .long 0xc21fdddb
+ .long 0xbcefd760
+ .long 0xb7f7d08c
+ .long 0xb32fc960
+ .long 0xae97c1ea
+ .long 0xaa27ba26
+ .long 0xa5e7b22e
+ .long 0xa1cfa9fe
+ .long 0x9ddfa1a0
+ .long 0x9a0f990c
+ .long 0x9667905d
+ .long 0x92df878a
+ .long 0x8f6f7e84
+ .long 0x8c27757e
+ .long 0x88f76c54
+ .long 0x85df630c
+ .long 0x82e759c5
+ .long 0x8007506d
+ .long 0x7d3f470a
+ .long 0x7a8f3da2
+ .long 0x77ef341e
+ .long 0x756f2abe
+ .long 0x72f7212d
+ .long 0x709717ad
+ .long 0x6e4f0e44
+ .long 0x6c1704d6
+ .long 0x69e6fb44
+ .long 0x67cef1d7
+ .long 0x65c6e872
+ .long 0x63cedf18
+ .long 0x61e6d5cd
+ .long 0x6006cc6d
+ .long 0x5e36c323
+ .long 0x5c76b9f3
+ .long 0x5abeb0b7
+ .long 0x5916a79b
+ .long 0x57769e77
+ .long 0x55de954d
+ .long 0x54568c4e
+ .long 0x52d6834d
+ .long 0x51667a7f
+ .long 0x4ffe71b5
+ .long 0x4e9e68f1
+ .long 0x4d466035
+ .long 0x4bf65784
+ .long 0x4aae4ede
+ .long 0x496e4646
+ .long 0x48363dbd
+ .long 0x47063547
+ .long 0x45de2ce5
+ .long 0x44be2498
+ .long 0x43a61c64
+ .long 0x4296144a
+ .long 0x41860c0e
+ .long 0x407e03ee
+.L7f800000:
+ .long 0x7f800000
+ .balign 4
+ .global __divsf3_support
+__divsf3_support:
+.Linf_NaN:
+ bclr.f 0,r0,31 ; 0/0 -> NaN
+ xor_s r0,r0,r1
+ bmsk r1,r0,30
+ bic_s r0,r0,r1
+ sub.eq r0,r0,1
+ j_s.d [blink]
+ or r0,r0,r9
+.Lret0:
+ xor_s r0,r0,r1
+ bmsk r1,r0,30
+ j_s.d [blink]
+ bic_s r0,r0,r1
+/* N.B. the spacing between divtab and the sub3 to get its address must
+ be a multiple of 8. */
+__divsf3:
+ lsr r2,r1,17
+ sub3 r3,pcl,37 ; (.-.Ldivtab) >> 3
+ bmsk_s r2,r2,5
+ ld.as r5,[r3,r2]
+ asl r4,r1,9
+ ld.as r9,[pcl,-13]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
+ mulu64 r5,r4
+ and.f r11,r1,r9
+ asl r6,r1,8
+ bset r6,r6,31
+ beq.d .Ldenorm_fp1
+ asl r5,r5,13
+ breq.d r11,r9,.Linf_nan_fp1
+ and.f r2,r0,r9
+ sub r7,r5,mhi
+ mulu64 r7,r6
+ beq.d .Ldenorm_fp0
+ asl r12,r0,8
+ breq.d r2,r9,.Linf_nan_fp0
+ mulu64 mhi,r7
+.Lpast_denorm_fp1:
+ bset r3,r12,31
+.Lpast_denorm_fp0:
+ cmp_s r3,r6
+ lsr.cc r3,r3,1
+ add_s r2,r2, /* wait for immediate */ \
+ 0x3f000000
+ sub r7,r7,mhi ; u1.31 inverse, about 30 bit
+ mulu64 r3,r7
+ sbc r2,r2,r11
+ xor.f 0,r0,r1
+ and r0,r2,r9
+ bclr r3,r9,23 ; 0x7f000000
+ brhs.d r2,r3,.Linf_denorm
+ bxor.mi r0,r0,31
+.Lpast_denorm:
+ add r3,mhi,0x22 ; round to nearest or higher
+ tst r3,0x3c ; check if rounding was unsafe
+ lsr r3,r3,6
+ jne.d [blink] ; return if rounding was safe.
+ add_s r0,r0,r3
+ /* work out exact rounding if we fall through here. */
+ /* We know that the exact result cannot be represented in single
+ precision. Find the mid-point between the two nearest
+ representable values, multiply with the divisor, and check if
+ the result is larger than the dividend. */
+ add_s r3,r3,r3
+ sub_s r3,r3,1
+ mulu64 r3,r6
+ asr.f 0,r0,1 ; for round-to-even in case this is a denorm
+ rsub r2,r9,25
+ asl_s r12,r12,r2
+ sub.f 0,r12,mlo
+ j_s.d [blink]
+ sub.mi r0,r0,1
+.Linf_nan_fp1:
+ lsr_s r0,r0,31
+ bmsk.f 0,r1,22
+ asl_s r0,r0,31
+ bne_s 0f ; inf/inf -> nan
+ brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan
+0: j_s.d [blink]
+ mov r0,-1
+.Lsigned0:
+.Linf_nan_fp0:
+ tst_s r1,r1
+ j_s.d [blink]
+ bxor.mi r0,r0,31
+ .balign 4
+ .global __divsf3
+/* For denormal results, it is possible that an exact result needs
+ rounding, and thus the round-to-even rule has to come into play. */
+.Linf_denorm:
+ brlo r2,0xc0000000,.Linf
+.Ldenorm:
+ asr_s r2,r2,23
+ bic r0,r0,r9
+ neg r9,r2
+ brlo.d r9,25,.Lpast_denorm
+ lsr r3,mlo,r9
+ /* Fall through: return +- 0 */
+ j_s [blink]
+.Linf:
+ j_s.d [blink]
+ or r0,r0,r9
+ .balign 4
+.Ldenorm_fp1:
+ bclr r6,r6,31
+ norm.f r12,r6 ; flag for x/0 -> Inf check
+ add r6,r6,r6
+ rsub r5,r12,16
+ ror r5,r1,r5
+ asl r6,r6,r12
+ bmsk r5,r5,5
+ ld.as r5,[r3,r5]
+ add r4,r6,r6
+ ; load latency
+ mulu64 r5,r4
+ bic.ne.f 0, \
+ 0x60000000,r0 ; large number / denorm -> Inf
+ asl r5,r5,13
+ sub r7,r5,mhi
+ beq.d .Linf_NaN
+ mulu64 r7,r6
+ asl_s r12,r12,23
+ and.f r2,r0,r9
+ add_s r2,r2,r12
+ asl r12,r0,8
+ bne.d .Lpast_denorm_fp1
+.Ldenorm_fp0: mulu64 mhi,r7
+ bclr r12,r12,31
+ norm.f r3,r12 ; flag for 0/x -> 0 check
+ bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
+ beq_s .Lret0
+ asl_s r12,r12,r3
+ asl_s r3,r3,23
+ add_s r12,r12,r12
+ add r11,r11,r3
+ b.d .Lpast_denorm_fp0
+ mov_s r3,r12
+ ENDFUNC(__divsf3)
diff --git a/libgcc/config/arc/ieee-754/arc600-mul64/muldf3.S b/libgcc/config/arc/ieee-754/arc600-mul64/muldf3.S
new file mode 100644
index 00000000000..1e18999e49a
--- /dev/null
+++ b/libgcc/config/arc/ieee-754/arc600-mul64/muldf3.S
@@ -0,0 +1,234 @@
+/* Copyright (C) 2008-2013 Free Software Foundation, Inc.
+ Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
+ on behalf of Synopsys Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "../arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+ .global __muldf3
+ .balign 4
+__muldf3:
+ push_s blink
+ push_s r2
+ push_s r3
+ push_s r0
+ bl.d __muldf3_c
+ push_s r1
+ ld_s r2,[sp,12]
+ ld_s r3,[sp,8]
+ st_s r0,[sp,12]
+ st_s r1,[sp,8]
+ pop_s r1
+ bl.d __muldf3_asm
+ pop_s r0
+ pop_s r3
+ pop_s r2
+ pop_s blink
+ cmp r0,r2
+ cmp.eq r1,r3
+ jeq_s [blink]
+ and r12,DBL0H,DBL1H
+ bic.f 0,0x7ff80000,r12 ; both NaN -> OK
+ jeq_s [blink]
+ b abort
+#define __muldf3 __muldf3_asm
+#endif /* DEBUG */
+
+__muldf3_support: /* This label makes debugger output saner. */
+ .balign 4
+ FUNC(__muldf3)
+.Ldenorm_2:
+ breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output
+ norm.f r12,DBL1L
+ mov.mi r12,21
+ add.pl r12,r12,22
+ neg r11,r12
+ asl_s r12,r12,20
+ lsr.f DBL1H,DBL1L,r11
+ ror DBL1L,DBL1L,r11
+ sub_s DBL0H,DBL0H,r12
+ mov.eq DBL1H,DBL1L
+ sub_l DBL1L,DBL1L,DBL1H
+ /* Fall through. */
+ .global __muldf3
+ .balign 4
+__muldf3:
+ mulu64 DBL0L,DBL1L
+ ld.as r9,[pcl,0x68] ; ((.L7ff00000-.+2)/4)]
+ bmsk r6,DBL0H,19
+ bset r6,r6,20
+ and r11,DBL0H,r9
+ breq.d r11,0,.Ldenorm_dbl0
+ and r12,DBL1H,r9
+ breq.d r12,0,.Ldenorm_dbl1
+ mov r8,mlo
+ mov r4,mhi
+ mulu64 r6,DBL1L
+ breq.d r11,r9,.Linf_nan
+ bmsk r10,DBL1H,19
+ breq.d r12,r9,.Linf_nan
+ bset r10,r10,20
+ add.f r4,r4,mlo
+ adc r5,mhi,0
+ mulu64 r10,DBL0L
+ add_s r12,r12,r11 ; add exponents
+ add.f r4,r4,mlo
+ adc r5,r5,mhi
+ mulu64 r6,r10
+ tst r8,r8
+ bclr r8,r9,30 ; 0x3ff00000
+ bset.ne r4,r4,0 ; put least significant word into sticky bit
+ bclr r6,r9,20 ; 0x7fe00000
+ add.f r5,r5,mlo
+ adc r7,mhi,0 ; fraction product in r7:r5:r4
+ lsr.f r10,r7,9
+ rsub.eq r8,r8,r9 ; 0x40000000
+ sub r12,r12,r8 ; subtract bias + implicit 1
+ brhs.d r12,r6,.Linf_denorm
+ rsub r10,r10,12
+.Lshift_frac:
+ neg r8,r10
+ asl r6,r4,r10
+ lsr DBL0L,r4,r8
+ add.f 0,r6,r6
+ btst.eq DBL0L,0
+ cmp.eq r4,r4 ; round to nearest / round to even
+ asl r4,r5,r10
+ lsr r5,r5,r8
+ adc.f DBL0L,DBL0L,r4
+ xor.f 0,DBL0H,DBL1H
+ asl r7,r7,r10
+ add_s r12,r12,r5
+ adc DBL0H,r12,r7
+ j_s.d [blink]
+ bset.mi DBL0H,DBL0H,31
+
+/* N.B. This is optimized for ARC700.
+ ARC600 has very different scheduling / instruction selection criteria. */
+
+/* If one number is denormal, subtract some from the exponent of the other
+ one (if the other exponent is too small, return 0), and normalize the
+ denormal. Then re-run the computation. */
+.Lret0_2:
+ lsr_s DBL0H,DBL0H,31
+ asl_s DBL0H,DBL0H,31
+ j_s.d [blink]
+ mov_s DBL0L,0
+ .balign 4
+.Ldenorm_dbl0:
+ mov_s r12,DBL0L
+ mov_s DBL0L,DBL1L
+ mov_s DBL1L,r12
+ mov_s r12,DBL0H
+ mov_s DBL0H,DBL1H
+ mov_s DBL1H,r12
+ and r11,DBL0H,r9
+.Ldenorm_dbl1:
+ brhs r11,r9,.Linf_nan
+ brhs 0x3ca00001,r11,.Lret0
+ sub_s DBL0H,DBL0H,DBL1H
+ bmsk.f DBL1H,DBL1H,30
+ add_s DBL0H,DBL0H,DBL1H
+ beq.d .Ldenorm_2
+ norm r12,DBL1H
+ sub_s r12,r12,10
+ asl r5,r12,20
+ asl_s DBL1H,DBL1H,r12
+ sub DBL0H,DBL0H,r5
+ neg r5,r12
+ lsr r6,DBL1L,r5
+ asl_s DBL1L,DBL1L,r12
+ b.d __muldf3
+ add_s DBL1H,DBL1H,r6
+
+.Lret0: xor_s DBL0H,DBL0H,DBL1H
+ bclr DBL1H,DBL0H,31
+ xor_s DBL0H,DBL0H,DBL1H
+ j_s.d [blink]
+ mov_s DBL0L,0
+
+ .balign 4
+.Linf_nan:
+ bclr r12,DBL1H,31
+ xor_s DBL1H,DBL1H,DBL0H
+ bclr_s DBL0H,DBL0H,31
+ max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
+ or.f 0,DBL0H,DBL0L
+ mov_s DBL0L,0
+ or.ne.f DBL1L,DBL1L,r12
+ not_s DBL0H,DBL0L ; inf * 0 -> NaN
+ mov.ne DBL0H,r8
+ tst_s DBL1H,DBL1H
+ j_s.d [blink]
+ bset.mi DBL0H,DBL0H,31
+
+/* We have checked for infinity / NaN input before, and transformed
+ denormalized inputs into normalized inputs. Thus, the worst case
+ exponent overflows are:
+ 1 + 1 - 0x400 == 0xc02 : maximum underflow
+ 0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
+ N.B. 0x7e and 0x7f are also values for overflow.
+
+ If (r12 <= -54), we have an underflow to zero. */
+ .balign 4
+.Linf_denorm:
+ lsr r6,r12,28
+ brlo.d r6,0xc,.Linf
+ asr r6,r12,20
+ add.f r10,r10,r6
+ brgt.d r10,0,.Lshift_frac
+ mov_s r12,0
+ beq.d .Lround_frac
+ add r10,r10,32
+.Lshift32_frac:
+ tst r4,r4
+ mov r4,r5
+ bset.ne r4,r4,1
+ mov r5,r7
+ brge.d r10,1,.Lshift_frac
+ mov r7,0
+ breq.d r10,0,.Lround_frac
+ add r10,r10,32
+ brgt r10,21,.Lshift32_frac
+ b_s .Lret0
+
+.Lround_frac:
+ add.f 0,r4,r4
+ btst.eq r5,0
+ mov_s DBL0L,r5
+ mov_s DBL0H,r7
+ adc.eq.f DBL0L,DBL0L,0
+ j_s.d [blink]
+ adc.eq DBL0H,DBL0H,0
+
+.Linf: mov_s DBL0L,0
+ xor.f DBL1H,DBL1H,DBL0H
+ mov_s DBL0H,r9
+ j_s.d [blink]
+ bset.mi DBL0H,DBL0H,31
+ ENDFUNC(__muldf3)
+
+ .balign 4
+.L7ff00000:
+ .long 0x7ff00000
diff --git a/libgcc/config/arc/ieee-754/arc600-mul64/mulsf3.S b/libgcc/config/arc/ieee-754/arc600-mul64/mulsf3.S
new file mode 100644
index 00000000000..619662491b2
--- /dev/null
+++ b/libgcc/config/arc/ieee-754/arc600-mul64/mulsf3.S
@@ -0,0 +1,180 @@
+/* Copyright (C) 2008-2013 Free Software Foundation, Inc.
+ Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
+ on behalf of Synopsys Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "../arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+ .global __mulsf3
+ FUNC(__mulsf3)
+ .balign 4
+__mulsf3:
+ push_s blink
+ push_s r1
+ bl.d __mulsf3_c
+ push_s r0
+ ld_s r1,[sp,4]
+ st_s r0,[sp,4]
+ bl.d __mulsf3_asm
+ pop_s r0
+ pop_s r1
+ pop_s blink
+ cmp r0,r1
+ jeq_s [blink]
+ and r12,r0,r1
+ bic.f 0,0x7f800000,r12
+ bne 0f
+ bmsk.f 0,r0,22
+ bmsk.ne.f r1,r1,22
+ jne_s [blink] ; both NaN -> OK
+0: bl abort
+ ENDFUNC(__mulsf3)
+#define __mulsf3 __mulsf3_asm
+#endif /* DEBUG */
+
+ .balign 4
+ .global __mulsf3
+ FUNC(__mulsf3)
+__mulsf3:
+ ld.as r9,[pcl,80]; [pcl,((.L7f800000-.+2)/4)]
+ bmsk r4,r1,22
+ bset r2,r0,23
+ asl_s r2,r2,8
+ bset r3,r4,23
+ mulu64 r2,r3
+ and r11,r0,r9
+ breq.d r11,0,.Ldenorm_dbl0
+ and r12,r1,r9
+ breq.d r12,0,.Ldenorm_dbl1
+ xor_s r0,r0,r1
+ breq.d r11,r9,.Linf_nan_dbl0
+ ld.as r4,[pcl,70]; [pcl,((.L7fffffff-.+2)/4)]
+ breq.d r12,r9,.Linf_nan_dbl1
+.Lpast_denorm:
+ asl.f 0,mhi,8
+ mov r6,mhi
+ mov r7,mlo
+ add.pl r6,r6,r6
+ bclr.pl r6,r6,23
+ add.pl.f r7,r7,r7
+ add.cs r6,r6,1
+ lsr.f 0,r6,1
+ add_s r12,r12,r11
+ adc.f 0,r7,r4
+ add_s r12,r12, \
+ -0x3f800000
+ adc.f r8,r6,r12
+ tst.pl r8,r9
+ bic r0,r0,r4
+ min r3,r8,r9
+ jpnz.d [blink]
+ add.pnz r0,r0,r3
+; infinity or denormal number
+ add.ne.f r3,r3,r3
+ asr_s r3,r3,23+1
+ bset r6,r6,23
+ bpnz.d .Linfinity
+ sub_s r3,r3,1
+ neg_s r2,r3
+ brhi.d r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0
+ lsr r2,r6,r2
+ asl r9,r6,r3
+ lsr.f 0,r2,1
+ tst r7,r7
+ add_s r0,r0,r2
+ bset.ne r9,r9,0
+ adc.f 0,r9,r4
+ j_s.d [blink]
+ add.cs r0,r0,1
+.Linfinity:
+ j_s.d [blink]
+ add_s r0,r0,r9
+
+.Lret_r0: j_s [blink]
+
+ .balign 4
+.Ldenorm_dbl0:
+ bclr_s r2,r2,31
+ norm.f r4,r2
+ add_s r2,r2,r2
+ asl r2,r2,r4
+ mulu64 r2,r3
+ breq.d r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
+ asl r4,r4,23
+ sub.ne.f r12,r12,r4
+ ld.as r4,[pcl,29]; [pcl,((.L7fffffff-.+2)/4)]
+ bhi.d .Lpast_denorm
+ xor_s r0,r0,r1
+ bmsk r1,r0,30
+ j_s.d [blink]
+ bic_s r0,r0,r1
+
+ .balign 4
+.Ldenorm_dbl0_inf_nan_dbl1:
+ bmsk.f 0,r0,30
+ beq_s .Lretnan
+ xor_s r0,r0,r1
+.Linf_nan_dbl1:
+ xor_s r1,r1,r0
+.Linf_nan_dbl0:
+ bclr_s r1,r1,31
+ cmp_s r1,r9
+ jls.d [blink]
+ xor_s r0,r0,r1
+; r1 NaN -> result NaN
+.Lretnan:
+ j_s.d [blink]
+ mov r0,-1
+
+ .balign 4
+.Ldenorm_dbl1:
+ breq.d r11,r9,.Linf_nan_dbl0_2
+ norm.f r3,r4
+ sub_s r3,r3,7
+ asl r4,r4,r3
+ mulu64 r2,r4
+ sub_s r3,r3,1
+ asl_s r3,r3,23
+ sub.ne.f r11,r11,r3
+ ld.as r4,[pcl,11]; [pcl,((.L7fffffff-.+2)/4)]
+ bhi.d .Lpast_denorm
+ bmsk r8,r0,30
+ j_s.d [blink]
+ bic r0,r0,r8
+
+ .balign 4
+.Linf_nan_dbl0_2:
+ bclr_s r1,r1,31
+ xor_s r0,r0,r1
+ sub.eq r1,r1,1 ; inf/nan * 0 -> nan
+ bic.f 0,r9,r1
+ j_s.d [blink]
+ or.eq r0,r0,r1 ; r1 nan -> result nan
+
+ .balign 4
+.L7f800000:
+ .long 0x7f800000
+.L7fffffff:
+ .long 0x7fffffff
+ ENDFUNC(__mulsf3)