4 files changed, 1098 insertions, 0 deletions
diff --git a/libgcc/config/arc/ieee-754/arc600-mul64/divdf3.S b/libgcc/config/arc/ieee-754/arc600-mul64/divdf3.S
new file mode 100644
index 00000000000..0584b53f77b
--- /dev/null
+++ b/libgcc/config/arc/ieee-754/arc600-mul64/divdf3.S
@@ -0,0 +1,410 @@
+/* Copyright (C) 2008-2013 Free Software Foundation, Inc.  
+   Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
+		on behalf of Synopsys Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/*
+   to calculate a := b/x as b*y, with y := 1/x:
+   - x is in the range [1..2)
+   - calculate 15..18 bit inverse y0 using a table of approximating polynoms.
+     Precision is higher for polynoms used to evaluate input with larger
+     value.
+   - Do one newton-raphson iteration step to double the precision,
+     then multiply this with the divisor
+	-> more time to decide if dividend is subnormal
+     - the worst error propagation is on the side of the value range
+       with the least initial defect, thus giving us about 30 bits precision.
+      The truncation error for the either is less than 1 + x/2 ulp.
+      A 31 bit inverse can be simply calculated by using x with implicit 1
+      and chaining the multiplies.  For a 32 bit inverse, we multiply y0^2
+      with the bare fraction part of x, then add in y0^2 for the implicit
+      1 of x.
+    - If calculating a 31 bit inverse, the systematic error is less than
+      -1 ulp; likewise, for 32 bit, it is less than -2 ulp.
+    - If we calculate our seed with a 32 bit fraction, we can archive a
+      tentative result strictly better than -2 / +2.5 (1) ulp/128, i.e. we
+      only need to take the step to calculate the 2nd stage rest and
+      rounding adjust 1/32th of the time.  However, if we use a 20 bit
+      fraction for the seed, the negative error can exceed -2 ulp/128, (2)
+      thus for a simple add / tst check, we need to do the 2nd stage
+      rest calculation/ rounding adjust 1/16th of the time.
+      (1): The inexactness of the 32 bit inverse contributes an error in the
+      range of (-1 .. +(1+x/2) ) ulp/128.  Leaving out the low word of the
+      rest contributes an error < +1/x ulp/128 .  In the interval [1,2),
+      x/2 + 1/x <= 1.5 .
+      (2): Unless proven otherwise.  I have not actually looked for an
+      example where -2 ulp/128 is exceeded, and my calculations indicate
+      that the excess, if existent, is less than -1/512 ulp.
+    ??? The algorithm is still based on the ARC700 optimized code.
+    Maybe we could make better use of 64 bit multiply results and/or mmed .
+ */
+#include "../arc-ieee-754.h"
+
+/* N.B. fp-bit.c does double rounding on denormal numbers.  */
+#if 0 /* DEBUG */
+	.global __divdf3
+	FUNC(__divdf3)
+	.balign 4
+__divdf3:
+	push_s blink
+	push_s r2
+	push_s r3
+	push_s r0
+	bl.d __divdf3_c
+	push_s r1
+	ld_s r2,[sp,12]
+	ld_s r3,[sp,8]
+	st_s r0,[sp,12]
+	st_s r1,[sp,8]
+	pop_s r1
+	bl.d __divdf3_asm
+	pop_s r0
+	pop_s r3
+	pop_s r2
+	pop_s blink
+	cmp r0,r2
+	cmp.eq r1,r3
+	jeq_s [blink]
+	and r12,DBL0H,DBL1H
+	bic.f 0,0x7ff80000,r12 ; both NaN -> OK
+	jeq_s [blink]
+	bl abort
+	ENDFUNC(__divdf3)
+#define __divdf3 __divdf3_asm
+#endif /* DEBUG */
+
+	FUNC(__divdf3)
+	.balign 4
+.L7ff00000:
+	.long 0x7ff00000
+.Ldivtab:
+	.long 0xfc0fffe1
+	.long 0xf46ffdfb
+	.long 0xed1ffa54
+	.long 0xe61ff515
+	.long 0xdf7fee75
+	.long 0xd91fe680
+	.long 0xd2ffdd52
+	.long 0xcd1fd30c
+	.long 0xc77fc7cd
+	.long 0xc21fbbb6
+	.long 0xbcefaec0
+	.long 0xb7efa100
+	.long 0xb32f92bf
+	.long 0xae8f83b7
+	.long 0xaa2f7467
+	.long 0xa5ef6479
+	.long 0xa1cf53fa
+	.long 0x9ddf433e
+	.long 0x9a0f3216
+	.long 0x965f2091
+	.long 0x92df0f11
+	.long 0x8f6efd05
+	.long 0x8c1eeacc
+	.long 0x88eed876
+	.long 0x85dec615
+	.long 0x82eeb3b9
+	.long 0x800ea10b
+	.long 0x7d3e8e0f
+	.long 0x7a8e7b3f
+	.long 0x77ee6836
+	.long 0x756e5576
+	.long 0x72fe4293
+	.long 0x709e2f93
+	.long 0x6e4e1c7f
+	.long 0x6c0e095e
+	.long 0x69edf6c5
+	.long 0x67cde3a5
+	.long 0x65cdd125
+	.long 0x63cdbe25
+	.long 0x61ddab3f
+	.long 0x600d991f
+	.long 0x5e3d868c
+	.long 0x5c6d7384
+	.long 0x5abd615f
+	.long 0x590d4ecd
+	.long 0x576d3c83
+	.long 0x55dd2a89
+	.long 0x545d18e9
+	.long 0x52dd06e9
+	.long 0x516cf54e
+	.long 0x4ffce356
+	.long 0x4e9cd1ce
+	.long 0x4d3cbfec
+	.long 0x4becae86
+	.long 0x4aac9da4
+	.long 0x496c8c73
+	.long 0x483c7bd3
+	.long 0x470c6ae8
+	.long 0x45dc59af
+	.long 0x44bc4915
+	.long 0x43ac3924
+	.long 0x428c27fb
+	.long 0x418c187a
+	.long 0x407c07bd
+
+__divdf3_support: /* This label makes debugger output saner.  */
+	.balign 4
+.Ldenorm_dbl1:
+	brge r6, \
+		0x43500000,.Linf_NaN ; large number / denorm -> Inf
+	bmsk.f r12,DBL1H,19
+	mov.eq r12,DBL1L
+	mov.eq DBL1L,0
+	sub.eq r7,r7,32
+	norm.f r11,r12 ; flag for x/0 -> Inf check
+	beq_s .Linf_NaN
+	mov.mi r11,0
+	add.pl r11,r11,1
+	add_s r12,r12,r12
+	asl r8,r12,r11
+	rsub r12,r11,31
+	lsr r12,DBL1L,r12
+	tst_s DBL1H,DBL1H
+	or r8,r8,r12
+	lsr r4,r8,26
+	lsr DBL1H,r8,12
+	ld.as r4,[r10,r4]
+	bxor.mi DBL1H,DBL1H,31
+	sub r11,r11,11
+	asl DBL1L,DBL1L,r11
+	sub r11,r11,1
+	mulu64 r4,r8
+	sub r7,r7,r11
+	b.d .Lpast_denorm_dbl1
+	asl r7,r7,20
+
+	.balign 4
+.Ldenorm_dbl0:
+	bmsk.f r12,DBL0H,19
+	; wb stall
+	mov.eq r12,DBL0L
+	sub.eq r6,r6,32
+	norm.f r11,r12 ; flag for 0/x -> 0 check
+	brge r7, \
+		0x43500000, .Lret0_2 ; denorm/large number -> 0
+	beq_s .Lret0_2
+	mov.mi r11,0
+	add.pl r11,r11,1
+	asl r12,r12,r11
+	sub r6,r6,r11
+	add.f 0,r6,31
+	lsr r10,DBL0L,r6
+	mov.mi r10,0
+	add r6,r6,11+32
+	neg.f r11,r6
+	asl DBL0L,DBL0L,r11
+	mov.pl DBL0L,0
+	sub r6,r6,32-1
+	b.d .Lpast_denorm_dbl0
+	asl r6,r6,20
+
+.Linf_NaN:
+	tst_s DBL0L,DBL0L ; 0/0 -> NaN
+	xor_s DBL1H,DBL1H,DBL0H
+	bclr.eq.f DBL0H,DBL0H,31
+	bmsk DBL0H,DBL1H,30
+	xor_s DBL0H,DBL0H,DBL1H
+	sub.eq DBL0H,DBL0H,1
+	mov_s DBL0L,0
+	j_s.d [blink]
+	or DBL0H,DBL0H,r9
+	.balign 4
+.Lret0_2:
+	xor_s DBL1H,DBL1H,DBL0H
+	mov_s DBL0L,0
+	bmsk DBL0H,DBL1H,30
+	j_s.d [blink]
+	xor_s DBL0H,DBL0H,DBL1H
+	.balign 4
+	.global __divdf3
+/* N.B. the spacing between divtab and the sub3 to get its address must
+   be a multiple of 8.  */
+__divdf3:
+	asl r8,DBL1H,12
+	lsr r4,r8,26
+	sub3 r10,pcl,61; (.-.Ldivtab) >> 3
+	ld.as r9,[pcl,-124]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
+	ld.as r4,[r10,r4]
+	lsr r12,DBL1L,20
+	and.f r7,DBL1H,r9
+	or r8,r8,r12
+	mulu64 r4,r8
+	beq.d .Ldenorm_dbl1
+.Lpast_denorm_dbl1:
+	and.f r6,DBL0H,r9
+	breq.d r7,r9,.Linf_nan_dbl1
+	asl r4,r4,12
+	sub r4,r4,mhi
+	mulu64 r4,r4
+	beq.d .Ldenorm_dbl0
+	lsr r8,r8,1
+	breq.d r6,r9,.Linf_nan_dbl0
+	asl r12,DBL0H,11
+	lsr r10,DBL0L,21
+.Lpast_denorm_dbl0:
+	bset r8,r8,31
+	mulu64 mhi,r8
+	add_s r12,r12,r10
+	bset r5,r12,31
+	cmp r5,r8
+	cmp.eq DBL0L,DBL1L
+	lsr.cc r5,r5,1
+	sub r4,r4,mhi ; u1.31 inverse, about 30 bit
+	mulu64 r5,r4 ; result fraction highpart
+	lsr r8,r8,2 ; u3.29
+	add r5,r6, /* wait for immediate */ \
+		0x3fe00000
+	mov r11,mhi ; result fraction highpart
+	mulu64 r11,r8 ; u-28.31
+	asl_s DBL1L,DBL1L,9 ; u-29.23:9
+	sbc r6,r5,r7
+	mov r12,mlo ; u-28.31
+	mulu64 r11,DBL1L ; mhi: u-28.23:9
+	add.cs DBL0L,DBL0L,DBL0L
+	asl_s DBL0L,DBL0L,6 ; u-26.25:7
+	asl r10,r11,23
+	sub_l DBL0L,DBL0L,r12
+	lsr r7,r11,9
+	sub r5,DBL0L,mhi ; rest msw ; u-26.31:0
+	mul64 r5,r4 ; mhi: result fraction lowpart
+	xor.f 0,DBL0H,DBL1H
+	and DBL0H,r6,r9
+	add_s DBL0H,DBL0H,r7
+	bclr r12,r9,20 ; 0x7fe00000
+	brhs.d r6,r12,.Linf_denorm
+	bxor.mi DBL0H,DBL0H,31
+	add.f r12,mhi,0x11
+	asr r9,r12,5
+	sub.mi DBL0H,DBL0H,1
+	add.f DBL0L,r9,r10
+	tst r12,0x1c
+	jne.d [blink]
+	add.cs DBL0H,DBL0H,1
+        /* work out exact rounding if we fall through here.  */
+        /* We know that the exact result cannot be represented in double
+           precision.  Find the mid-point between the two nearest
+           representable values, multiply with the divisor, and check if
+           the result is larger than the dividend.  Since we want to know
+	   only the sign bit, it is sufficient to calculate only the
+	   highpart of the lower 64 bits.  */
+	mulu64 r11,DBL1L ; rest before considering r12 in r5 : -mlo
+	sub.f DBL0L,DBL0L,1
+	asl r12,r9,2 ; u-22.30:2
+	sub.cs DBL0H,DBL0H,1
+	sub.f r12,r12,2
+	mov r10,mlo ; rest before considering r12 in r5 : -r10
+	mulu64 r12,DBL1L ; mhi: u-51.32
+	asl r5,r5,25 ; s-51.7:25
+	lsr r10,r10,7 ; u-51.30:2
+	mov r7,mhi ; u-51.32
+	mulu64 r12,r8 ; mlo: u-51.31:1
+	sub r5,r5,r10
+	add.mi r5,r5,DBL1L ; signed multiply adjust for r12*DBL1L
+	bset r7,r7,0 ; make sure that the result is not zero, and that
+	sub r5,r5,r7 ; a highpart zero appears negative
+	sub.f r5,r5,mlo ; rest msw
+	add.pl.f DBL0L,DBL0L,1
+	j_s.d [blink]
+	add.eq DBL0H,DBL0H,1
+
+.Linf_nan_dbl1: ; 0/Inf -> NaN Inf/Inf -> NaN x/Inf-> 0 x/NaN -> NaN
+	or.f 0,r6,DBL0L
+	cmp.ne r6,r9
+	not_s DBL0L,DBL1H
+	sub_s.ne DBL0L,DBL0L,DBL0L
+	tst_s DBL0H,DBL0H
+	add_s DBL0H,DBL1H,DBL0L
+	j_s.d [blink]
+	bxor.mi DBL0H,DBL0H,31
+.Linf_nan_dbl0:
+	tst_s DBL1H,DBL1H
+	j_s.d [blink]
+	bxor.mi DBL0H,DBL0H,31
+	.balign 4
+.Linf_denorm:
+	lsr r12,r6,28
+	brlo.d r12,0xc,.Linf
+.Ldenorm:
+	asr r6,r6,20
+	neg r9,r6
+	mov_s DBL0H,0
+	brhs.d r9,54,.Lret0
+	bxor.mi DBL0H,DBL0H,31
+	add r12,mhi,1
+	and r12,r12,-4
+	rsub r7,r6,5
+	asr r10,r12,28
+	bmsk r4,r12,27
+	min r7,r7,31
+	asr DBL0L,r4,r7
+	add DBL1H,r11,r10
+	abs.f r10,r4
+	sub.mi r10,r10,1
+	add.f r7,r6,32-5
+	asl r4,r4,r7
+	mov.mi r4,r10
+	add.f r10,r6,23
+	rsub r7,r6,9
+	lsr r7,DBL1H,r7
+	asl r10,DBL1H,r10
+	or.pnz DBL0H,DBL0H,r7
+	or.mi r4,r4,r10
+	mov.mi r10,r7
+	add.f DBL0L,r10,DBL0L
+	add.cs.f DBL0H,DBL0H,1 ; carry clear after this point
+	bxor.f 0,r4,31
+	add.pnz.f DBL0L,DBL0L,1
+	add.cs.f DBL0H,DBL0H,1
+	jne_s [blink]
+	/* Calculation so far was not conclusive; calculate further rest.  */
+	mulu64 r11,DBL1L ; rest before considering r12 in r5 : -mlo
+	asr.f r12,r12,3
+	asl r5,r5,25 ; s-51.7:25
+	mov r11,mlo ; rest before considering r12 in r5 : -r11
+	mulu64 r12,r8 ; u-51.31:1
+	and r9,DBL0L,1 ; tie-breaker: round to even
+	lsr r11,r11,7 ; u-51.30:2
+	mov DBL1H,mlo ; u-51.31:1
+	mulu64 r12,DBL1L ; u-51.62:2
+	sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L
+	add_s DBL1H,DBL1H,r11
+	sub DBL1H,DBL1H,r5 ; -rest msw
+	add_s DBL1H,DBL1H,mhi ; -rest msw
+	add.f 0,DBL1H,DBL1H ; can't ror.f by 32 :-(
+	tst_s DBL1H,DBL1H
+	cmp.eq mlo,r9
+	add.cs.f DBL0L,DBL0L,1
+	j_s.d [blink]
+	add.cs DBL0H,DBL0H,1
+
+.Lret0:
+	/* return +- 0 */
+	j_s.d [blink]
+	mov_s DBL0L,0
+.Linf:
+	mov_s DBL0H,r9
+	mov_s DBL0L,0
+	j_s.d [blink]
+	bxor.mi DBL0H,DBL0H,31
+	ENDFUNC(__divdf3)
diff --git a/libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S b/libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S
new file mode 100644
index 00000000000..a9934f0ad94
--- /dev/null
+++ b/libgcc/config/arc/ieee-754/arc600-mul64/divsf3.S
@@ -0,0 +1,274 @@
+/* Copyright (C) 2008-2013 Free Software Foundation, Inc.  
+   Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
+		on behalf of Synopsys Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/*
+   - calculate 15..18 bit inverse using a table of approximating polynoms.
+     precision is higher for polynoms used to evaluate input with larger
+     value.
+   - do one newton-raphson iteration step to double the precision,
+     then multiply this with the divisor
+	-> more time to decide if dividend is subnormal
+     - the worst error propagation is on the side of the value range
+       with the least initial defect, thus giving us about 30 bits precision.
+ */
+#include "../arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	.global __divsf3
+	FUNC(__divsf3)
+	.balign 4
+__divsf3:
+	push_s blink
+	push_s r1
+	bl.d __divsf3_c
+	push_s r0
+	ld_s r1,[sp,4]
+	st_s r0,[sp,4]
+	bl.d __divsf3_asm
+	pop_s r0
+	pop_s r1
+	pop_s blink
+	cmp r0,r1
+#if 1
+	bne abort
+	jeq_s [blink]
+	b abort
+#else
+	bne abort
+	j_s [blink]
+#endif
+	ENDFUNC(__divsf3)
+#define __divsf3 __divsf3_asm
+#endif /* DEBUG */
+
+	FUNC(__divsf3)
+	.balign 4
+.Ldivtab:
+	.long 0xfc0ffff0
+	.long 0xf46ffefd
+	.long 0xed1ffd2a
+	.long 0xe627fa8e
+	.long 0xdf7ff73b
+	.long 0xd917f33b
+	.long 0xd2f7eea3
+	.long 0xcd1fe986
+	.long 0xc77fe3e7
+	.long 0xc21fdddb
+	.long 0xbcefd760
+	.long 0xb7f7d08c
+	.long 0xb32fc960
+	.long 0xae97c1ea
+	.long 0xaa27ba26
+	.long 0xa5e7b22e
+	.long 0xa1cfa9fe
+	.long 0x9ddfa1a0
+	.long 0x9a0f990c
+	.long 0x9667905d
+	.long 0x92df878a
+	.long 0x8f6f7e84
+	.long 0x8c27757e
+	.long 0x88f76c54
+	.long 0x85df630c
+	.long 0x82e759c5
+	.long 0x8007506d
+	.long 0x7d3f470a
+	.long 0x7a8f3da2
+	.long 0x77ef341e
+	.long 0x756f2abe
+	.long 0x72f7212d
+	.long 0x709717ad
+	.long 0x6e4f0e44
+	.long 0x6c1704d6
+	.long 0x69e6fb44
+	.long 0x67cef1d7
+	.long 0x65c6e872
+	.long 0x63cedf18
+	.long 0x61e6d5cd
+	.long 0x6006cc6d
+	.long 0x5e36c323
+	.long 0x5c76b9f3
+	.long 0x5abeb0b7
+	.long 0x5916a79b
+	.long 0x57769e77
+	.long 0x55de954d
+	.long 0x54568c4e
+	.long 0x52d6834d
+	.long 0x51667a7f
+	.long 0x4ffe71b5
+	.long 0x4e9e68f1
+	.long 0x4d466035
+	.long 0x4bf65784
+	.long 0x4aae4ede
+	.long 0x496e4646
+	.long 0x48363dbd
+	.long 0x47063547
+	.long 0x45de2ce5
+	.long 0x44be2498
+	.long 0x43a61c64
+	.long 0x4296144a
+	.long 0x41860c0e
+	.long 0x407e03ee
+.L7f800000:
+	.long 0x7f800000
+	.balign 4
+	.global __divsf3_support
+__divsf3_support:
+.Linf_NaN:
+	bclr.f 0,r0,31 ; 0/0 -> NaN
+	xor_s r0,r0,r1
+	bmsk r1,r0,30
+	bic_s r0,r0,r1
+	sub.eq r0,r0,1
+	j_s.d [blink]
+	or r0,r0,r9
+.Lret0:
+	xor_s r0,r0,r1
+	bmsk r1,r0,30
+	j_s.d [blink]
+	bic_s r0,r0,r1
+/* N.B. the spacing between divtab and the sub3 to get its address must
+   be a multiple of 8.  */
+__divsf3:
+	lsr r2,r1,17
+	sub3 r3,pcl,37 ; (.-.Ldivtab) >> 3
+	bmsk_s r2,r2,5
+	ld.as r5,[r3,r2]
+	asl r4,r1,9
+	ld.as r9,[pcl,-13]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
+	mulu64 r5,r4
+	and.f r11,r1,r9
+	asl r6,r1,8
+	bset r6,r6,31
+	beq.d .Ldenorm_fp1
+	asl r5,r5,13
+	breq.d r11,r9,.Linf_nan_fp1
+	and.f r2,r0,r9
+	sub r7,r5,mhi
+	mulu64 r7,r6
+	beq.d .Ldenorm_fp0
+	asl r12,r0,8
+	breq.d r2,r9,.Linf_nan_fp0
+	mulu64 mhi,r7
+.Lpast_denorm_fp1:
+	bset r3,r12,31
+.Lpast_denorm_fp0:
+	cmp_s r3,r6
+	lsr.cc r3,r3,1
+	add_s r2,r2, /* wait for immediate */ \
+		0x3f000000
+	sub r7,r7,mhi ; u1.31 inverse, about 30 bit
+	mulu64 r3,r7
+	sbc r2,r2,r11
+	xor.f 0,r0,r1
+	and r0,r2,r9
+	bclr r3,r9,23 ; 0x7f000000
+	brhs.d r2,r3,.Linf_denorm
+	bxor.mi r0,r0,31
+.Lpast_denorm:
+	add r3,mhi,0x22 ; round to nearest or higher
+	tst r3,0x3c ; check if rounding was unsafe
+	lsr r3,r3,6
+	jne.d [blink] ; return if rounding was safe.
+	add_s r0,r0,r3
+        /* work out exact rounding if we fall through here.  */
+        /* We know that the exact result cannot be represented in single
+           precision.  Find the mid-point between the two nearest
+           representable values, multiply with the divisor, and check if
+           the result is larger than the dividend.  */
+        add_s r3,r3,r3
+        sub_s r3,r3,1
+        mulu64 r3,r6
+	asr.f 0,r0,1 ; for round-to-even in case this is a denorm
+	rsub r2,r9,25
+        asl_s r12,r12,r2
+        sub.f 0,r12,mlo
+        j_s.d [blink]
+        sub.mi r0,r0,1
+.Linf_nan_fp1:
+	lsr_s r0,r0,31
+	bmsk.f 0,r1,22
+	asl_s r0,r0,31
+	bne_s 0f ; inf/inf -> nan
+	brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan
+0:	j_s.d [blink]
+	mov r0,-1
+.Lsigned0:
+.Linf_nan_fp0:
+	tst_s r1,r1
+	j_s.d [blink]
+	bxor.mi r0,r0,31
+	.balign 4
+	.global __divsf3
+/* For denormal results, it is possible that an exact result needs
+   rounding, and thus the round-to-even rule has to come into play.  */
+.Linf_denorm:
+	brlo r2,0xc0000000,.Linf
+.Ldenorm:
+	asr_s r2,r2,23
+	bic r0,r0,r9
+	neg r9,r2
+	brlo.d r9,25,.Lpast_denorm
+	lsr r3,mlo,r9
+	/* Fall through: return +- 0 */
+	j_s [blink]
+.Linf:
+	j_s.d [blink]
+	or r0,r0,r9
+	.balign 4
+.Ldenorm_fp1:
+	bclr r6,r6,31
+	norm.f r12,r6 ; flag for x/0 -> Inf check
+	add r6,r6,r6
+	rsub r5,r12,16
+	ror r5,r1,r5
+	asl r6,r6,r12
+	bmsk r5,r5,5
+	ld.as r5,[r3,r5]
+	add r4,r6,r6
+	; load latency
+	mulu64 r5,r4
+	bic.ne.f 0, \
+		0x60000000,r0 ; large number / denorm -> Inf
+	asl r5,r5,13
+	sub r7,r5,mhi
+	beq.d .Linf_NaN
+	mulu64 r7,r6
+	asl_s r12,r12,23
+	and.f r2,r0,r9
+	add_s r2,r2,r12
+	asl r12,r0,8
+	bne.d .Lpast_denorm_fp1
+.Ldenorm_fp0: mulu64 mhi,r7
+	bclr r12,r12,31
+	norm.f r3,r12 ; flag for 0/x -> 0 check
+	bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
+	beq_s .Lret0
+	asl_s r12,r12,r3
+	asl_s r3,r3,23
+	add_s r12,r12,r12
+	add r11,r11,r3
+	b.d .Lpast_denorm_fp0
+	mov_s r3,r12
+	ENDFUNC(__divsf3)
diff --git a/libgcc/config/arc/ieee-754/arc600-mul64/muldf3.S b/libgcc/config/arc/ieee-754/arc600-mul64/muldf3.S
new file mode 100644
index 00000000000..1e18999e49a
--- /dev/null
+++ b/libgcc/config/arc/ieee-754/arc600-mul64/muldf3.S
@@ -0,0 +1,234 @@
+/* Copyright (C) 2008-2013 Free Software Foundation, Inc.  
+   Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
+		on behalf of Synopsys Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "../arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	.global __muldf3
+	.balign 4
+__muldf3:
+	push_s blink
+	push_s r2
+	push_s r3
+	push_s r0
+	bl.d __muldf3_c
+	push_s r1
+	ld_s r2,[sp,12]
+	ld_s r3,[sp,8]
+	st_s r0,[sp,12]
+	st_s r1,[sp,8]
+	pop_s r1
+	bl.d __muldf3_asm
+	pop_s r0
+	pop_s r3
+	pop_s r2
+	pop_s blink
+	cmp r0,r2
+	cmp.eq r1,r3
+	jeq_s [blink]
+	and r12,DBL0H,DBL1H
+	bic.f 0,0x7ff80000,r12 ; both NaN -> OK
+	jeq_s [blink]
+	b abort
+#define __muldf3 __muldf3_asm
+#endif /* DEBUG */
+
+__muldf3_support: /* This label makes debugger output saner.  */
+	.balign 4
+	FUNC(__muldf3)
+.Ldenorm_2:
+	breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output
+	norm.f r12,DBL1L
+	mov.mi r12,21
+	add.pl r12,r12,22
+	neg r11,r12
+	asl_s r12,r12,20
+	lsr.f DBL1H,DBL1L,r11
+	ror DBL1L,DBL1L,r11
+	sub_s DBL0H,DBL0H,r12
+	mov.eq DBL1H,DBL1L
+	sub_l DBL1L,DBL1L,DBL1H
+	/* Fall through.  */
+	.global __muldf3
+	.balign 4
+__muldf3:
+	mulu64 DBL0L,DBL1L
+	ld.as r9,[pcl,0x68] ; ((.L7ff00000-.+2)/4)]
+	bmsk r6,DBL0H,19
+	bset r6,r6,20
+	and r11,DBL0H,r9
+	breq.d r11,0,.Ldenorm_dbl0
+	and r12,DBL1H,r9
+	breq.d r12,0,.Ldenorm_dbl1
+	mov r8,mlo
+	mov r4,mhi
+	mulu64 r6,DBL1L
+	breq.d r11,r9,.Linf_nan
+	bmsk r10,DBL1H,19
+	breq.d r12,r9,.Linf_nan
+	bset r10,r10,20
+	add.f r4,r4,mlo
+	adc r5,mhi,0
+	mulu64 r10,DBL0L
+	add_s r12,r12,r11 ; add exponents
+	add.f r4,r4,mlo
+	adc r5,r5,mhi
+	mulu64 r6,r10
+	tst r8,r8
+	bclr r8,r9,30 ; 0x3ff00000
+	bset.ne r4,r4,0 ; put least significant word into sticky bit
+	bclr r6,r9,20 ; 0x7fe00000
+	add.f r5,r5,mlo
+	adc r7,mhi,0 ; fraction product in r7:r5:r4
+	lsr.f r10,r7,9
+	rsub.eq r8,r8,r9 ; 0x40000000
+	sub r12,r12,r8 ; subtract bias + implicit 1
+	brhs.d r12,r6,.Linf_denorm
+	rsub r10,r10,12
+.Lshift_frac:
+	neg r8,r10
+	asl r6,r4,r10
+	lsr DBL0L,r4,r8
+	add.f 0,r6,r6
+	btst.eq DBL0L,0
+	cmp.eq r4,r4 ; round to nearest / round to even
+	asl r4,r5,r10
+	lsr r5,r5,r8
+	adc.f DBL0L,DBL0L,r4
+	xor.f 0,DBL0H,DBL1H
+	asl r7,r7,r10
+	add_s r12,r12,r5
+	adc DBL0H,r12,r7
+	j_s.d [blink]
+	bset.mi DBL0H,DBL0H,31
+
+/* N.B. This is optimized for ARC700.
+  ARC600 has very different scheduling / instruction selection criteria.  */
+
+/* If one number is denormal, subtract some from the exponent of the other
+   one (if the other exponent is too small, return 0), and normalize the
+   denormal.  Then re-run the computation.  */
+.Lret0_2:
+	lsr_s DBL0H,DBL0H,31
+	asl_s DBL0H,DBL0H,31
+	j_s.d [blink]
+	mov_s DBL0L,0
+	.balign 4
+.Ldenorm_dbl0:
+	mov_s r12,DBL0L
+	mov_s DBL0L,DBL1L
+	mov_s DBL1L,r12
+	mov_s r12,DBL0H
+	mov_s DBL0H,DBL1H
+	mov_s DBL1H,r12
+	and r11,DBL0H,r9
+.Ldenorm_dbl1:
+	brhs r11,r9,.Linf_nan
+	brhs 0x3ca00001,r11,.Lret0
+	sub_s DBL0H,DBL0H,DBL1H
+	bmsk.f DBL1H,DBL1H,30
+	add_s DBL0H,DBL0H,DBL1H
+	beq.d .Ldenorm_2
+	norm r12,DBL1H
+	sub_s r12,r12,10
+	asl r5,r12,20
+	asl_s DBL1H,DBL1H,r12
+	sub DBL0H,DBL0H,r5
+	neg r5,r12
+	lsr r6,DBL1L,r5
+	asl_s DBL1L,DBL1L,r12
+	b.d __muldf3
+	add_s DBL1H,DBL1H,r6
+
+.Lret0:	xor_s DBL0H,DBL0H,DBL1H
+	bclr DBL1H,DBL0H,31
+	xor_s DBL0H,DBL0H,DBL1H
+	j_s.d [blink]
+	mov_s DBL0L,0
+
+	.balign 4
+.Linf_nan:
+	bclr r12,DBL1H,31
+	xor_s DBL1H,DBL1H,DBL0H
+	bclr_s DBL0H,DBL0H,31
+	max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
+	or.f 0,DBL0H,DBL0L
+	mov_s DBL0L,0
+	or.ne.f DBL1L,DBL1L,r12
+	not_s DBL0H,DBL0L ; inf * 0 -> NaN
+	mov.ne DBL0H,r8
+	tst_s DBL1H,DBL1H
+	j_s.d [blink]
+	bset.mi DBL0H,DBL0H,31
+
+/* We have checked for infinity / NaN input before, and transformed
+   denormalized inputs into normalized inputs.  Thus, the worst case
+   exponent overflows are:
+       1 +     1 - 0x400 == 0xc02 : maximum underflow
+   0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
+   N.B. 0x7e and 0x7f are also values for overflow.
+
+   If (r12 <= -54), we have an underflow to zero.  */
+	.balign 4
+.Linf_denorm:
+	lsr r6,r12,28
+	brlo.d r6,0xc,.Linf
+	asr r6,r12,20
+	add.f r10,r10,r6
+	brgt.d r10,0,.Lshift_frac
+	mov_s r12,0
+	beq.d .Lround_frac
+	add r10,r10,32
+.Lshift32_frac:
+	tst r4,r4
+	mov r4,r5
+	bset.ne r4,r4,1
+	mov r5,r7
+	brge.d r10,1,.Lshift_frac
+	mov r7,0
+	breq.d r10,0,.Lround_frac
+	add r10,r10,32
+	brgt r10,21,.Lshift32_frac
+	b_s .Lret0
+
+.Lround_frac:
+	add.f 0,r4,r4
+	btst.eq r5,0
+	mov_s DBL0L,r5
+	mov_s DBL0H,r7
+	adc.eq.f DBL0L,DBL0L,0
+	j_s.d [blink]
+	adc.eq DBL0H,DBL0H,0
+
+.Linf:	mov_s DBL0L,0
+	xor.f DBL1H,DBL1H,DBL0H
+	mov_s DBL0H,r9
+	j_s.d [blink]
+	bset.mi DBL0H,DBL0H,31
+	ENDFUNC(__muldf3)
+
+	.balign 4
+.L7ff00000:
+	.long 0x7ff00000
diff --git a/libgcc/config/arc/ieee-754/arc600-mul64/mulsf3.S b/libgcc/config/arc/ieee-754/arc600-mul64/mulsf3.S
new file mode 100644
index 00000000000..619662491b2
--- /dev/null
+++ b/libgcc/config/arc/ieee-754/arc600-mul64/mulsf3.S
@@ -0,0 +1,180 @@
+/* Copyright (C) 2008-2013 Free Software Foundation, Inc.  
+   Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
+		on behalf of Synopsys Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "../arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	.global __mulsf3
+	FUNC(__mulsf3)
+	.balign 4
+__mulsf3:
+	push_s blink
+	push_s r1
+	bl.d __mulsf3_c
+	push_s r0
+	ld_s r1,[sp,4]
+	st_s r0,[sp,4]
+	bl.d __mulsf3_asm
+	pop_s r0
+	pop_s r1
+	pop_s blink
+	cmp r0,r1
+	jeq_s [blink]
+	and r12,r0,r1
+	bic.f 0,0x7f800000,r12
+	bne 0f
+	bmsk.f 0,r0,22
+	bmsk.ne.f r1,r1,22
+	jne_s [blink] ; both NaN -> OK
+0:	bl abort
+	ENDFUNC(__mulsf3)
+#define __mulsf3 __mulsf3_asm
+#endif /* DEBUG */
+
+	.balign	4
+	.global	__mulsf3
+	FUNC(__mulsf3)
+__mulsf3:
+	ld.as	r9,[pcl,80]; [pcl,((.L7f800000-.+2)/4)]
+	bmsk	r4,r1,22
+	bset	r2,r0,23
+	asl_s	r2,r2,8
+	bset	r3,r4,23
+	mulu64	r2,r3
+	and	r11,r0,r9
+	breq.d	r11,0,.Ldenorm_dbl0
+	and	r12,r1,r9
+	breq.d	r12,0,.Ldenorm_dbl1
+	xor_s	r0,r0,r1
+	breq.d	r11,r9,.Linf_nan_dbl0
+	ld.as	r4,[pcl,70]; [pcl,((.L7fffffff-.+2)/4)]
+	breq.d	r12,r9,.Linf_nan_dbl1
+.Lpast_denorm:
+	asl.f	0,mhi,8
+	mov	r6,mhi
+	mov	r7,mlo
+	add.pl	r6,r6,r6
+	bclr.pl	r6,r6,23
+	add.pl.f r7,r7,r7
+	add.cs	r6,r6,1
+	lsr.f	0,r6,1
+	add_s	r12,r12,r11
+	adc.f	0,r7,r4
+	add_s	r12,r12, \
+		-0x3f800000
+	adc.f	r8,r6,r12
+	tst.pl	r8,r9
+	bic	r0,r0,r4
+	min	r3,r8,r9
+	jpnz.d	[blink]
+	add.pnz	r0,r0,r3
+; infinity or denormal number
+	add.ne.f r3,r3,r3
+	asr_s	r3,r3,23+1
+	bset	r6,r6,23
+	bpnz.d	.Linfinity
+	sub_s	r3,r3,1
+	neg_s	r2,r3
+	brhi.d	r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0
+	lsr	r2,r6,r2
+	asl	r9,r6,r3
+	lsr.f	0,r2,1
+	tst	r7,r7
+	add_s	r0,r0,r2
+	bset.ne	r9,r9,0
+	adc.f	0,r9,r4
+	j_s.d	[blink]
+	add.cs	r0,r0,1
+.Linfinity:
+	j_s.d	[blink]
+	add_s	r0,r0,r9
+
+.Lret_r0: j_s [blink]
+
+	.balign	4
+.Ldenorm_dbl0:
+	bclr_s	r2,r2,31
+	norm.f	r4,r2
+	add_s	r2,r2,r2
+	asl	r2,r2,r4
+	mulu64	r2,r3
+	breq.d	r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
+	asl	r4,r4,23
+	sub.ne.f r12,r12,r4
+	ld.as	r4,[pcl,29]; [pcl,((.L7fffffff-.+2)/4)]
+	bhi.d	.Lpast_denorm
+	xor_s	r0,r0,r1
+	bmsk	r1,r0,30
+	j_s.d	[blink]
+	bic_s	r0,r0,r1
+
+	.balign	4
+.Ldenorm_dbl0_inf_nan_dbl1:
+	bmsk.f	0,r0,30
+	beq_s	.Lretnan
+	xor_s	r0,r0,r1
+.Linf_nan_dbl1:
+	xor_s	r1,r1,r0
+.Linf_nan_dbl0:
+	bclr_s	r1,r1,31
+	cmp_s	r1,r9
+	jls.d	[blink]
+	xor_s	r0,r0,r1
+; r1 NaN -> result NaN
+.Lretnan:
+	j_s.d [blink]
+	mov r0,-1
+
+	.balign	4
+.Ldenorm_dbl1:
+	breq.d	r11,r9,.Linf_nan_dbl0_2
+	norm.f	r3,r4
+	sub_s	r3,r3,7
+	asl	r4,r4,r3
+	mulu64	r2,r4
+	sub_s	r3,r3,1
+	asl_s	r3,r3,23
+	sub.ne.f r11,r11,r3
+	ld.as	r4,[pcl,11]; [pcl,((.L7fffffff-.+2)/4)]
+	bhi.d	.Lpast_denorm
+	bmsk	r8,r0,30
+	j_s.d	[blink]
+	bic	r0,r0,r8
+
+	.balign	4
+.Linf_nan_dbl0_2:
+	bclr_s	r1,r1,31
+	xor_s	r0,r0,r1
+	sub.eq	r1,r1,1 ; inf/nan * 0 -> nan
+	bic.f	0,r9,r1
+	j_s.d	[blink]
+	or.eq	r0,r0,r1 ; r1 nan -> result nan
+
+	.balign	4
+.L7f800000:
+	.long	0x7f800000
+.L7fffffff:
+	.long	0x7fffffff
+	ENDFUNC(__mulsf3)