summaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/sh/crt1.asm134
-rw-r--r--gcc/config/sh/divcost-analysis7
-rw-r--r--gcc/config/sh/divtab-sh4-300.c81
-rw-r--r--gcc/config/sh/embed-elf.h1
-rw-r--r--gcc/config/sh/lib1funcs-4-300.asm938
-rw-r--r--gcc/config/sh/sh-protos.h6
-rw-r--r--gcc/config/sh/sh.c447
-rw-r--r--gcc/config/sh/sh.h20
-rw-r--r--gcc/config/sh/sh.md212
-rw-r--r--gcc/config/sh/sh.opt57
-rw-r--r--gcc/config/sh/sh1.md6
-rw-r--r--gcc/config/sh/sh4-300.md288
-rw-r--r--gcc/config/sh/sh4.md28
-rw-r--r--gcc/config/sh/sh4a.md14
-rw-r--r--gcc/config/sh/superh.h10
-rw-r--r--gcc/config/sh/t-sh17
16 files changed, 2037 insertions, 229 deletions
diff --git a/gcc/config/sh/crt1.asm b/gcc/config/sh/crt1.asm
index c110fa07427..7aa684434d7 100644
--- a/gcc/config/sh/crt1.asm
+++ b/gcc/config/sh/crt1.asm
@@ -1,4 +1,5 @@
-/* Copyright (C) 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2000, 2001, 2003, 2004, 2005, 2006
+ Free Software Foundation, Inc.
This file was pretty much copied from newlib.
This file is part of GCC.
@@ -894,25 +895,12 @@ ___main:
nop
#ifdef VBR_SETUP
! Exception handlers
- .balign 256
+ .section .text.vbr, "ax"
vbr_start:
- mov.l 2f, r0 ! load the old vbr setting (if any)
- mov.l @r0, r0
- cmp/eq #0, r0
- bf 1f
- ! no previous vbr - jump to own generic handler
- bra handler
- nop
-1: ! there was a previous handler - chain them
- jmp @r0
- nop
- .balign 4
-2:
- .long old_vbr
- .balign 256
+ .org 0x100
vbr_100:
- #ifdef PROFILE
+#ifdef PROFILE
! Note on register usage.
! we use r0..r3 as scratch in this code. If we are here due to a trapa for profiling
! then this is OK as we are just before executing any function code.
@@ -1017,50 +1005,7 @@ handler_100:
2:
.long old_vbr
- .balign 256
-vbr_200:
- mov.l 2f, r0 ! load the old vbr setting (if any)
- mov.l @r0, r0
- cmp/eq #0, r0
- bf 1f
- ! no previous vbr - jump to own generic handler
- bra handler
- nop
-1: ! there was a previous handler - chain them
- add #0x7f, r0 ! 0x7f
- add #0x7f, r0 ! 0xfe
- add #0x7f, r0 ! 0x17d
- add #0x7f, r0 ! 0x1fc
- add #0x4, r0 ! add 0x200 without corrupting another register
- jmp @r0
- nop
- .balign 4
-2:
- .long old_vbr
-
- .balign 256
-vbr_300:
- mov.l 2f, r0 ! load the old vbr setting (if any)
- mov.l @r0, r0
- cmp/eq #0, r0
- bf 1f
- ! no previous vbr - jump to own generic handler
- bra handler
- nop
-1: ! there was a previous handler - chain them
- rotcr r0
- rotcr r0
- add #0x7f, r0 ! 0x1fc
- add #0x41, r0 ! 0x300
- rotcl r0
- rotcl r0 ! Add 0x300 without corrupting another register
- jmp @r0
- nop
- .balign 4
-2:
- .long old_vbr
-
- .balign 256
+ .org 0x400
vbr_400: ! Should be at vbr+0x400
mov.l 2f, r0 ! load the old vbr setting (if any)
mov.l @r0, r0
@@ -1103,28 +1048,7 @@ handler:
jmp @r2
nop
- .balign 256
-vbr_500:
- mov.l 2f, r0 ! load the old vbr setting (if any)
- mov.l @r0, r0
- cmp/eq #0, r0
- ! no previous vbr - jump to own generic handler
- bt handler
- ! there was a previous handler - chain them
- rotcr r0
- rotcr r0
- add #0x7f, r0 ! 0x1fc
- add #0x7f, r0 ! 0x3f8
- add #0x42, r0 ! 0x500
- rotcl r0
- rotcl r0 ! Add 0x500 without corrupting another register
- jmp @r0
- nop
- .balign 4
-2:
- .long old_vbr
-
- .balign 256
+ .org 0x600
vbr_600:
#ifdef PROFILE
! Should be at vbr+0x600
@@ -1140,11 +1064,48 @@ vbr_600:
mov.l r6,@-r15
mov.l r7,@-r15
sts.l pr,@-r15
+ sts.l mach,@-r15
+ sts.l macl,@-r15
+#if defined(__SH_FPU_ANY__)
+ ! Save fpul and fpscr, save fr0-fr7 in 64 bit mode
+ ! and set the pervading precision for the timer_handler
+ mov #0,r0
+ sts.l fpul,@-r15
+ sts.l fpscr,@-r15
+ lds r0,fpscr ! Clear fpscr
+ fmov fr0,@-r15
+ fmov fr1,@-r15
+ fmov fr2,@-r15
+ fmov fr3,@-r15
+ mov.l pervading_precision_k,r0
+ fmov fr4,@-r15
+ fmov fr5,@-r15
+ mov.l @r0,r0
+ fmov fr6,@-r15
+ fmov fr7,@-r15
+ lds r0,fpscr
+#endif /* __SH_FPU_ANY__ */
! Pass interrupted pc to timer_handler as first parameter (r4).
stc spc, r4
mov.l timer_handler_k, r0
jsr @r0
nop
+#if defined(__SH_FPU_ANY__)
+ mov #0,r0
+ lds r0,fpscr ! Clear the fpscr
+ fmov @r15+,fr7
+ fmov @r15+,fr6
+ fmov @r15+,fr5
+ fmov @r15+,fr4
+ fmov @r15+,fr3
+ fmov @r15+,fr2
+ fmov @r15+,fr1
+ fmov @r15+,fr0
+ lds.l @r15+,fpscr
+ lds.l @r15+,fpul
+#endif /* __SH_FPU_ANY__ */
+ lds.l @r15+,macl
+ lds.l @r15+,mach
lds.l @r15+,pr
mov.l @r15+,r7
mov.l @r15+,r6
@@ -1157,6 +1118,13 @@ vbr_600:
stc sgr, r15 ! Restore r15, destroyed by this sequence.
rte
nop
+#if defined(__SH_FPU_ANY__)
+ .balign 4
+pervading_precision_k:
+#define CONCAT1(A,B) A##B
+#define CONCAT(A,B) CONCAT1(A,B)
+ .long CONCAT(__USER_LABEL_PREFIX__,__fpscr_values)+4
+#endif
#else
mov.l 2f, r0 ! Load the old vbr setting (if any).
mov.l @r0, r0
diff --git a/gcc/config/sh/divcost-analysis b/gcc/config/sh/divcost-analysis
index 541e31324b3..0296269bb52 100644
--- a/gcc/config/sh/divcost-analysis
+++ b/gcc/config/sh/divcost-analysis
@@ -38,12 +38,17 @@ div_r8_neg -> div_r8_neg_end: 18
div_le128_neg -> div_by_1_neg: 4
div_le128_neg -> rts 18
- absolute divisor range:
+ sh4-200 absolute divisor range:
1 [2..128] [129..64K) [64K..|divident|/256] >=64K,>|divident/256|
udiv 18 22 38 32 30
sdiv pos: 20 24 41 35 32
sdiv neg: 15 25 42 36 33
+ sh4-300 absolute divisor range:
+ 8 bit 16 bit 24 bit > 24 bit
+udiv 15 35 28 25
+sdiv 14 36 34 31
+
fp-based:
diff --git a/gcc/config/sh/divtab-sh4-300.c b/gcc/config/sh/divtab-sh4-300.c
new file mode 100644
index 00000000000..448b0b8af8e
--- /dev/null
+++ b/gcc/config/sh/divtab-sh4-300.c
@@ -0,0 +1,81 @@
+/* Copyright (C) 2004, 2006 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file. (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING. If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+/* Calculate division table for ST40-300 integer division
+ Contributed by Joern Rennecke
+ joern.rennecke@st.com */
+
+#include <stdio.h>
+#include <math.h>
+
+int
+main ()
+{
+ int i, j;
+ double q, r, err, max_err = 0, max_s_err = 0;
+
+ puts("/* This table has been generated by divtab-sh4.c. */");
+ puts ("\t.balign 4");
+ for (i = -128; i < 128; i++)
+ {
+ int n = 0;
+ if (i == 0)
+ {
+ /* output some dummy number for 1/0. */
+ puts ("LOCAL(div_table_clz):\n\t.byte\t0");
+ continue;
+ }
+ for (j = i < 0 ? -i : i; j < 128; j += j)
+ n++;
+ printf ("\t.byte\t%d\n", n - 7);
+ }
+ puts("\
+/* 1/-128 .. 1/127, normalized. There is an implicit leading 1 in bit 32,\n\
+ or in bit 33 for powers of two. */\n\
+ .balign 4");
+ for (i = -128; i < 128; i++)
+ {
+ if (i == 0)
+ {
+ puts ("LOCAL(div_table_inv):\n\t.long\t0x0");
+ continue;
+ }
+ j = i < 0 ? -i : i;
+ while (j < 64)
+ j += j;
+ q = 4.*(1<<30)*128/j;
+ r = ceil (q);
+ printf ("\t.long\t0x%X\n", (unsigned) r);
+ err = r - q;
+ if (err > max_err)
+ max_err = err;
+ err = err * j / 128;
+ if (err > max_s_err)
+ max_s_err = err;
+ }
+ printf ("\t/* maximum error: %f scaled: %f*/\n", max_err, max_s_err);
+ exit (0);
+}
diff --git a/gcc/config/sh/embed-elf.h b/gcc/config/sh/embed-elf.h
index 4497cf34636..0d817cacf85 100644
--- a/gcc/config/sh/embed-elf.h
+++ b/gcc/config/sh/embed-elf.h
@@ -32,6 +32,7 @@ Boston, MA 02110-1301, USA. */
#define LIBGCC_SPEC "%{!shared: \
%{m4-100*:-lic_invalidate_array_4-100} \
%{m4-200*:-lic_invalidate_array_4-200} \
+ %{m4-300*|-m4-340:-lic_invalidate_array_4a %{!Os: -lgcc-4-300}} \
%{m4a*:-lic_invalidate_array_4a}} \
%{Os: -lgcc-Os-4-200} \
-lgcc \
diff --git a/gcc/config/sh/lib1funcs-4-300.asm b/gcc/config/sh/lib1funcs-4-300.asm
new file mode 100644
index 00000000000..b07912425af
--- /dev/null
+++ b/gcc/config/sh/lib1funcs-4-300.asm
@@ -0,0 +1,938 @@
+/* Copyright (C) 2004, 2006 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file. (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING. If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+/* libgcc routines for the STMicroelectronics ST40-300 CPU.
+ Contributed by J"orn Rennecke joern.rennecke@st.com. */
+
+#include "lib1funcs.h"
+
+#ifdef L_div_table
+#if defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
+/* This code used shld, thus is not suitable for SH1 / SH2. */
+
+/* Signed / unsigned division without use of FPU, optimized for SH4-300.
+ Uses a lookup table for divisors in the range -128 .. +127, and
+ div1 with case distinction for larger divisors in three more ranges.
+ The code is lumped together with the table to allow the use of mova. */
+#ifdef __LITTLE_ENDIAN__
+#define L_LSB 0
+#define L_LSWMSB 1
+#define L_MSWLSB 2
+#else
+#define L_LSB 3
+#define L_LSWMSB 2
+#define L_MSWLSB 1
+#endif
+
+ .global GLOBAL(udivsi3_i4i)
+ .global GLOBAL(sdivsi3_i4i)
+ FUNC(GLOBAL(udivsi3_i4i))
+ FUNC(GLOBAL(sdivsi3_i4i))
+
+ .balign 4
+LOCAL(div_ge8m): ! 10 cycles up to here
+ rotcr r1 ! signed shift must use original sign from r4
+ div0s r5,r4
+ mov #24,r7
+ shld r7,r6
+ shad r0,r1
+ rotcl r6
+ div1 r5,r1
+ swap.w r5,r0 ! detect -0x80000000 : 0x800000
+ rotcl r6
+ swap.w r4,r7
+ div1 r5,r1
+ swap.b r7,r7
+ rotcl r6
+ or r7,r0
+ div1 r5,r1
+ swap.w r0,r7
+ rotcl r6
+ or r7,r0
+ div1 r5,r1
+ add #-0x80,r0
+ rotcl r6
+ extu.w r0,r0
+ div1 r5,r1
+ neg r0,r0
+ rotcl r6
+ swap.w r0,r0
+ div1 r5,r1
+ mov.l @r15+,r7
+ and r6,r0
+ rotcl r6
+ div1 r5,r1
+ shll2 r0
+ rotcl r6
+ exts.b r0,r0
+ div1 r5,r1
+ swap.w r0,r0
+ exts.w r0,r1
+ exts.b r6,r0
+ mov.l @r15+,r6
+ rotcl r0
+ rts
+ sub r1,r0
+ ! 31 cycles up to here
+
+ .balign 4
+LOCAL(udiv_ge64k): ! 3 cycles up to here
+ mov r4,r0
+ shlr8 r0
+ div0u
+ cmp/hi r0,r5
+ bt LOCAL(udiv_r8)
+ mov.l r5,@-r15
+ shll8 r5
+ ! 7 cycles up to here
+ .rept 8
+ div1 r5,r0
+ .endr
+ extu.b r4,r1 ! 15 cycles up to here
+ extu.b r0,r6
+ xor r1,r0
+ xor r6,r0
+ swap.b r6,r6
+ .rept 8
+ div1 r5,r0
+ .endr ! 25 cycles up to here
+ extu.b r0,r0
+ mov.l @r15+,r5
+ or r6,r0
+ mov.l @r15+,r6
+ rts
+ rotcl r0 ! 28 cycles up to here
+
+ .balign 4
+LOCAL(udiv_r8): ! 6 cycles up to here
+ mov.l r4,@-r15
+ shll16 r4
+ shll8 r4
+ !
+ shll r4
+ mov r0,r1
+ div1 r5,r1
+ mov r4,r0
+ rotcl r0
+ mov.l @r15+,r4
+ div1 r5,r1
+ ! 12 cycles up to here
+ .rept 6
+ rotcl r0; div1 r5,r1
+ .endr
+ mov.l @r15+,r6 ! 24 cycles up to here
+ rts
+ rotcl r0
+
+ .balign 4
+LOCAL(div_ge32k): ! 6 cycles up to here
+ mov.l r7,@-r15
+ swap.w r5,r6
+ exts.b r6,r7
+ exts.w r6,r6
+ cmp/eq r6,r7
+ extu.b r1,r6
+ bf/s LOCAL(div_ge8m)
+ cmp/hi r1,r4 ! copy sign bit of r4 into T
+ rotcr r1 ! signed shift must use original sign from r4
+ div0s r5,r4
+ shad r0,r1
+ shll8 r5
+ div1 r5,r1
+ mov r5,r7 ! detect r4 == 0x80000000 && r5 == 0x8000(00)
+ div1 r5,r1
+ shlr8 r7
+ div1 r5,r1
+ swap.w r4,r0
+ div1 r5,r1
+ swap.b r0,r0
+ div1 r5,r1
+ or r0,r7
+ div1 r5,r1
+ add #-80,r7
+ div1 r5,r1
+ swap.w r7,r0
+ div1 r5,r1
+ or r0,r7
+ extu.b r1,r0
+ xor r6,r1
+ xor r0,r1
+ exts.b r0,r0
+ div1 r5,r1
+ extu.w r7,r7
+ div1 r5,r1
+ neg r7,r7 ! upper 16 bit of r7 == 0 if r4 == 0x80000000 && r5 == 0x8000
+ div1 r5,r1
+ and r0,r7
+ div1 r5,r1
+ swap.w r7,r7 ! 26 cycles up to here.
+ div1 r5,r1
+ shll8 r0
+ div1 r5,r1
+ exts.w r7,r7
+ div1 r5,r1
+ add r0,r0
+ div1 r5,r1
+ sub r7,r0
+ extu.b r1,r1
+ mov.l @r15+,r7
+ rotcl r1
+ mov.l @r15+,r6
+ add r1,r0
+ mov #-8,r1
+ rts
+ shad r1,r5 ! 34 cycles up to here
+
+ .balign 4
+GLOBAL(udivsi3_i4i):
+ mov.l r6,@-r15
+ extu.w r5,r6
+ cmp/eq r5,r6
+ mov #0x7f,r0
+ bf LOCAL(udiv_ge64k)
+ cmp/hi r0,r5
+ bf LOCAL(udiv_le128)
+ mov r4,r1
+ shlr8 r1
+ div0u
+ shlr r1
+ shll16 r6
+ div1 r6,r1
+ extu.b r4,r0 ! 7 cycles up to here
+ .rept 8
+ div1 r6,r1
+ .endr ! 15 cycles up to here
+ xor r1,r0 ! xor dividend with result lsb
+ .rept 6
+ div1 r6,r1
+ .endr
+ mov.l r7,@-r15 ! 21 cycles up to here
+ div1 r6,r1
+ extu.b r0,r7
+ div1 r6,r1
+ shll8 r7
+ extu.w r1,r0
+ xor r7,r1 ! replace lsb of result with lsb of dividend
+ div1 r6,r1
+ mov #0,r7
+ div1 r6,r1
+ !
+ div1 r6,r1
+ bra LOCAL(div_end)
+ div1 r6,r1 ! 28 cycles up to here
+
+ /* This is link-compatible with a GLOBAL(sdivsi3) call,
+ but we effectively clobber only r1, macl and mach */
+ /* Because negative quotients are calculated as one's complements,
+ -0x80000000 divided by the smallest positive number of a number
+ range (0x80, 0x8000, 0x800000) causes saturation in the one's
+ complement representation, and we have to suppress the
+ one's -> two's complement adjustment. Since positive numbers
+ don't get such an adjustment, it's OK to also compute one's -> two's
+ complement adjustment suppression for a dividend of 0. */
+ .balign 4
+GLOBAL(sdivsi3_i4i):
+ mov.l r6,@-r15
+ exts.b r5,r6
+ cmp/eq r5,r6
+ mov #-1,r1
+ bt/s LOCAL(div_le128)
+ cmp/pz r4
+ addc r4,r1
+ exts.w r5,r6
+ cmp/eq r5,r6
+ mov #-7,r0
+ bf/s LOCAL(div_ge32k)
+ cmp/hi r1,r4 ! copy sign bit of r4 into T
+ rotcr r1
+ shll16 r6 ! 7 cycles up to here
+ shad r0,r1
+ div0s r5,r4
+ div1 r6,r1
+ mov.l r7,@-r15
+ div1 r6,r1
+ mov r4,r0 ! re-compute adjusted dividend
+ div1 r6,r1
+ mov #-31,r7
+ div1 r6,r1
+ shad r7,r0
+ div1 r6,r1
+ add r4,r0 ! adjusted dividend
+ div1 r6,r1
+ mov.l r8,@-r15
+ div1 r6,r1
+ swap.w r4,r8 ! detect special case r4 = 0x80000000, r5 = 0x80
+ div1 r6,r1
+ swap.b r8,r8
+ xor r1,r0 ! xor dividend with result lsb
+ div1 r6,r1
+ div1 r6,r1
+ or r5,r8
+ div1 r6,r1
+ add #-0x80,r8 ! r8 is 0 iff there is a match
+ div1 r6,r1
+ swap.w r8,r7 ! or upper 16 bits...
+ div1 r6,r1
+ or r7,r8 !...into lower 16 bits
+ div1 r6,r1
+ extu.w r8,r8
+ div1 r6,r1
+ extu.b r0,r7
+ div1 r6,r1
+ shll8 r7
+ exts.w r1,r0
+ xor r7,r1 ! replace lsb of result with lsb of dividend
+ div1 r6,r1
+ neg r8,r8 ! upper 16 bits of r8 are now 0xffff iff we want end adjm.
+ div1 r6,r1
+ and r0,r8
+ div1 r6,r1
+ swap.w r8,r7
+ div1 r6,r1
+ mov.l @r15+,r8 ! 58 insns, 29 cycles up to here
+LOCAL(div_end):
+ div1 r6,r1
+ shll8 r0
+ div1 r6,r1
+ exts.w r7,r7
+ div1 r6,r1
+ add r0,r0
+ div1 r6,r1
+ sub r7,r0
+ extu.b r1,r1
+ mov.l @r15+,r7
+ rotcl r1
+ mov.l @r15+,r6
+ rts
+ add r1,r0
+
+ .balign 4
+LOCAL(udiv_le128): ! 4 cycles up to here (or 7 for mispredict)
+ mova LOCAL(div_table_inv),r0
+ shll2 r6
+ mov.l @(r0,r6),r1
+ mova LOCAL(div_table_clz),r0
+ lds r4,mach
+ !
+ !
+ !
+ tst r1,r1
+ !
+ bt 0f
+ dmulu.l r1,r4
+0: mov.b @(r0,r5),r1
+ clrt
+ !
+ !
+ sts mach,r0
+ addc r4,r0
+ rotcr r0
+ mov.l @r15+,r6
+ rts
+ shld r1,r0
+
+ .balign 4
+LOCAL(div_le128): ! 3 cycles up to here (or 6 for mispredict)
+ mova LOCAL(div_table_inv),r0
+ shll2 r6
+ mov.l @(r0,r6),r1
+ mova LOCAL(div_table_clz),r0
+ neg r4,r6
+ bf 0f
+ mov r4,r6
+0: lds r6,mach
+ tst r1,r1
+ bt 0f
+ dmulu.l r1,r6
+0: div0s r4,r5
+ mov.b @(r0,r5),r1
+ bt/s LOCAL(le128_neg)
+ clrt
+ !
+ sts mach,r0
+ addc r6,r0
+ rotcr r0
+ mov.l @r15+,r6
+ rts
+ shld r1,r0
+
+/* Could trap divide by zero for the cost of one cycle more mispredict penalty:
+...
+ dmulu.l r1,r6
+0: div0s r4,r5
+ bt/s LOCAL(le128_neg)
+ tst r5,r5
+ bt LOCAL(div_by_zero)
+ mov.b @(r0,r5),r1
+ sts mach,r0
+ addc r6,r0
+...
+LOCAL(div_by_zero):
+ trapa #
+ .balign 4
+LOCAL(le128_neg):
+ bt LOCAL(div_by_zero)
+ mov.b @(r0,r5),r1
+ sts mach,r0
+ addc r6,r0
+... */
+
+ .balign 4
+LOCAL(le128_neg):
+ sts mach,r0
+ addc r6,r0
+ rotcr r0
+ mov.l @r15+,r6
+ shad r1,r0
+ rts
+ neg r0,r0
+ ENDFUNC(GLOBAL(udivsi3_i4i))
+ ENDFUNC(GLOBAL(sdivsi3_i4i))
+
+/* This table has been generated by divtab-sh4.c. */
+ .balign 4
+ .byte -7
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -3
+ .byte -3
+ .byte -3
+ .byte -3
+ .byte -3
+ .byte -3
+ .byte -3
+ .byte -3
+ .byte -2
+ .byte -2
+ .byte -2
+ .byte -2
+ .byte -1
+ .byte -1
+ .byte 0
+LOCAL(div_table_clz):
+ .byte 0
+ .byte 0
+ .byte -1
+ .byte -1
+ .byte -2
+ .byte -2
+ .byte -2
+ .byte -2
+ .byte -3
+ .byte -3
+ .byte -3
+ .byte -3
+ .byte -3
+ .byte -3
+ .byte -3
+ .byte -3
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -4
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -5
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+ .byte -6
+/* 1/-128 .. 1/127, normalized. There is an implicit leading 1 in bit 32,
+ or in bit 33 for powers of two. */
+ .balign 4
+ .long 0x0
+ .long 0x2040811
+ .long 0x4104105
+ .long 0x624DD30
+ .long 0x8421085
+ .long 0xA6810A7
+ .long 0xC9714FC
+ .long 0xECF56BF
+ .long 0x11111112
+ .long 0x135C8114
+ .long 0x15B1E5F8
+ .long 0x18118119
+ .long 0x1A7B9612
+ .long 0x1CF06ADB
+ .long 0x1F7047DD
+ .long 0x21FB7813
+ .long 0x24924925
+ .long 0x27350B89
+ .long 0x29E4129F
+ .long 0x2C9FB4D9
+ .long 0x2F684BDB
+ .long 0x323E34A3
+ .long 0x3521CFB3
+ .long 0x38138139
+ .long 0x3B13B13C
+ .long 0x3E22CBCF
+ .long 0x41414142
+ .long 0x446F8657
+ .long 0x47AE147B
+ .long 0x4AFD6A06
+ .long 0x4E5E0A73
+ .long 0x51D07EAF
+ .long 0x55555556
+ .long 0x58ED2309
+ .long 0x5C9882BA
+ .long 0x60581606
+ .long 0x642C8591
+ .long 0x68168169
+ .long 0x6C16C16D
+ .long 0x702E05C1
+ .long 0x745D1746
+ .long 0x78A4C818
+ .long 0x7D05F418
+ .long 0x81818182
+ .long 0x86186187
+ .long 0x8ACB90F7
+ .long 0x8F9C18FA
+ .long 0x948B0FCE
+ .long 0x9999999A
+ .long 0x9EC8E952
+ .long 0xA41A41A5
+ .long 0xA98EF607
+ .long 0xAF286BCB
+ .long 0xB4E81B4F
+ .long 0xBACF914D
+ .long 0xC0E07039
+ .long 0xC71C71C8
+ .long 0xCD856891
+ .long 0xD41D41D5
+ .long 0xDAE6076C
+ .long 0xE1E1E1E2
+ .long 0xE9131AC0
+ .long 0xF07C1F08
+ .long 0xF81F81F9
+ .long 0x0
+ .long 0x4104105
+ .long 0x8421085
+ .long 0xC9714FC
+ .long 0x11111112
+ .long 0x15B1E5F8
+ .long 0x1A7B9612
+ .long 0x1F7047DD
+ .long 0x24924925
+ .long 0x29E4129F
+ .long 0x2F684BDB
+ .long 0x3521CFB3
+ .long 0x3B13B13C
+ .long 0x41414142
+ .long 0x47AE147B
+ .long 0x4E5E0A73
+ .long 0x55555556
+ .long 0x5C9882BA
+ .long 0x642C8591
+ .long 0x6C16C16D
+ .long 0x745D1746
+ .long 0x7D05F418
+ .long 0x86186187
+ .long 0x8F9C18FA
+ .long 0x9999999A
+ .long 0xA41A41A5
+ .long 0xAF286BCB
+ .long 0xBACF914D
+ .long 0xC71C71C8
+ .long 0xD41D41D5
+ .long 0xE1E1E1E2
+ .long 0xF07C1F08
+ .long 0x0
+ .long 0x8421085
+ .long 0x11111112
+ .long 0x1A7B9612
+ .long 0x24924925
+ .long 0x2F684BDB
+ .long 0x3B13B13C
+ .long 0x47AE147B
+ .long 0x55555556
+ .long 0x642C8591
+ .long 0x745D1746
+ .long 0x86186187
+ .long 0x9999999A
+ .long 0xAF286BCB
+ .long 0xC71C71C8
+ .long 0xE1E1E1E2
+ .long 0x0
+ .long 0x11111112
+ .long 0x24924925
+ .long 0x3B13B13C
+ .long 0x55555556
+ .long 0x745D1746
+ .long 0x9999999A
+ .long 0xC71C71C8
+ .long 0x0
+ .long 0x24924925
+ .long 0x55555556
+ .long 0x9999999A
+ .long 0x0
+ .long 0x55555556
+ .long 0x0
+ .long 0x0
+LOCAL(div_table_inv):
+ .long 0x0
+ .long 0x0
+ .long 0x0
+ .long 0x55555556
+ .long 0x0
+ .long 0x9999999A
+ .long 0x55555556
+ .long 0x24924925
+ .long 0x0
+ .long 0xC71C71C8
+ .long 0x9999999A
+ .long 0x745D1746
+ .long 0x55555556
+ .long 0x3B13B13C
+ .long 0x24924925
+ .long 0x11111112
+ .long 0x0
+ .long 0xE1E1E1E2
+ .long 0xC71C71C8
+ .long 0xAF286BCB
+ .long 0x9999999A
+ .long 0x86186187
+ .long 0x745D1746
+ .long 0x642C8591
+ .long 0x55555556
+ .long 0x47AE147B
+ .long 0x3B13B13C
+ .long 0x2F684BDB
+ .long 0x24924925
+ .long 0x1A7B9612
+ .long 0x11111112
+ .long 0x8421085
+ .long 0x0
+ .long 0xF07C1F08
+ .long 0xE1E1E1E2
+ .long 0xD41D41D5
+ .long 0xC71C71C8
+ .long 0xBACF914D
+ .long 0xAF286BCB
+ .long 0xA41A41A5
+ .long 0x9999999A
+ .long 0x8F9C18FA
+ .long 0x86186187
+ .long 0x7D05F418
+ .long 0x745D1746
+ .long 0x6C16C16D
+ .long 0x642C8591
+ .long 0x5C9882BA
+ .long 0x55555556
+ .long 0x4E5E0A73
+ .long 0x47AE147B
+ .long 0x41414142
+ .long 0x3B13B13C
+ .long 0x3521CFB3
+ .long 0x2F684BDB
+ .long 0x29E4129F
+ .long 0x24924925
+ .long 0x1F7047DD
+ .long 0x1A7B9612
+ .long 0x15B1E5F8
+ .long 0x11111112
+ .long 0xC9714FC
+ .long 0x8421085
+ .long 0x4104105
+ .long 0x0
+ .long 0xF81F81F9
+ .long 0xF07C1F08
+ .long 0xE9131AC0
+ .long 0xE1E1E1E2
+ .long 0xDAE6076C
+ .long 0xD41D41D5
+ .long 0xCD856891
+ .long 0xC71C71C8
+ .long 0xC0E07039
+ .long 0xBACF914D
+ .long 0xB4E81B4F
+ .long 0xAF286BCB
+ .long 0xA98EF607
+ .long 0xA41A41A5
+ .long 0x9EC8E952
+ .long 0x9999999A
+ .long 0x948B0FCE
+ .long 0x8F9C18FA
+ .long 0x8ACB90F7
+ .long 0x86186187
+ .long 0x81818182
+ .long 0x7D05F418
+ .long 0x78A4C818
+ .long 0x745D1746
+ .long 0x702E05C1
+ .long 0x6C16C16D
+ .long 0x68168169
+ .long 0x642C8591
+ .long 0x60581606
+ .long 0x5C9882BA
+ .long 0x58ED2309
+ .long 0x55555556
+ .long 0x51D07EAF
+ .long 0x4E5E0A73
+ .long 0x4AFD6A06
+ .long 0x47AE147B
+ .long 0x446F8657
+ .long 0x41414142
+ .long 0x3E22CBCF
+ .long 0x3B13B13C
+ .long 0x38138139
+ .long 0x3521CFB3
+ .long 0x323E34A3
+ .long 0x2F684BDB
+ .long 0x2C9FB4D9
+ .long 0x29E4129F
+ .long 0x27350B89
+ .long 0x24924925
+ .long 0x21FB7813
+ .long 0x1F7047DD
+ .long 0x1CF06ADB
+ .long 0x1A7B9612
+ .long 0x18118119
+ .long 0x15B1E5F8
+ .long 0x135C8114
+ .long 0x11111112
+ .long 0xECF56BF
+ .long 0xC9714FC
+ .long 0xA6810A7
+ .long 0x8421085
+ .long 0x624DD30
+ .long 0x4104105
+ .long 0x2040811
+ /* maximum error: 0.987342 scaled: 0.921875*/
+
+#endif /* SH3 / SH4 */
+
+#endif /* L_div_table */
diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
index a0661545b56..e142b1cee68 100644
--- a/gcc/config/sh/sh-protos.h
+++ b/gcc/config/sh/sh-protos.h
@@ -1,6 +1,6 @@
/* Definitions of target machine for GNU compiler for Renesas / SuperH SH.
Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2003,
- 2004, 2005
+ 2004, 2005, 2006
Free Software Foundation, Inc.
Contributed by Steve Chamberlain (sac@cygnus.com).
Improved by Jim Wilson (wilson@cygnus.com).
@@ -69,6 +69,10 @@ extern void print_operand (FILE *, rtx, int);
extern void output_pic_addr_const (FILE *, rtx);
extern int expand_block_move (rtx *);
extern int prepare_move_operands (rtx[], enum machine_mode mode);
+extern enum rtx_code prepare_cbranch_operands (rtx *, enum machine_mode mode,
+ enum rtx_code comparison);
+extern void expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int);
+extern bool expand_cbranchdi4 (rtx *operands, enum rtx_code comparison);
extern void from_compare (rtx *, int);
extern int shift_insns_rtx (rtx);
extern void gen_ashift (int, int, rtx);
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 30b87480412..9f733b852f1 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -526,10 +526,15 @@ sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
case OPT_m4:
case OPT_m4_100:
case OPT_m4_200:
+ case OPT_m4_300:
target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
return true;
case OPT_m4_nofpu:
+ case OPT_m4_100_nofpu:
+ case OPT_m4_200_nofpu:
+ case OPT_m4_300_nofpu:
+ case OPT_m4_340:
case OPT_m4_400:
case OPT_m4_500:
target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
@@ -538,12 +543,14 @@ sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
case OPT_m4_single:
case OPT_m4_100_single:
case OPT_m4_200_single:
+ case OPT_m4_300_single:
target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
return true;
case OPT_m4_single_only:
case OPT_m4_100_single_only:
case OPT_m4_200_single_only:
+ case OPT_m4_300_single_only:
target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
return true;
@@ -1341,6 +1348,288 @@ prepare_move_operands (rtx operands[], enum machine_mode mode)
return 0;
}
+enum rtx_code
+prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
+ enum rtx_code comparison)
+{
+ rtx op1;
+ rtx scratch = NULL_RTX;
+
+ if (comparison == CODE_FOR_nothing)
+ comparison = GET_CODE (operands[0]);
+ else
+ scratch = operands[4];
+ if (GET_CODE (operands[1]) == CONST_INT
+ && GET_CODE (operands[2]) != CONST_INT)
+ {
+ rtx tmp = operands[1];
+
+ operands[1] = operands[2];
+ operands[2] = tmp;
+ comparison = swap_condition (comparison);
+ }
+ if (GET_CODE (operands[2]) == CONST_INT)
+ {
+ HOST_WIDE_INT val = INTVAL (operands[2]);
+ if ((val == -1 || val == -0x81)
+ && (comparison == GT || comparison == LE))
+ {
+ comparison = (comparison == GT) ? GE : LT;
+ operands[2] = gen_int_mode (val + 1, mode);
+ }
+ else if ((val == 1 || val == 0x80)
+ && (comparison == GE || comparison == LT))
+ {
+ comparison = (comparison == GE) ? GT : LE;
+ operands[2] = gen_int_mode (val - 1, mode);
+ }
+ else if (val == 1 && (comparison == GEU || comparison == LTU))
+ {
+ comparison = (comparison == GEU) ? NE : EQ;
+ operands[2] = CONST0_RTX (mode);
+ }
+ else if (val == 0x80 && (comparison == GEU || comparison == LTU))
+ {
+ comparison = (comparison == GEU) ? GTU : LEU;
+ operands[2] = gen_int_mode (val - 1, mode);
+ }
+ else if (val == 0 && (comparison == GTU || comparison == LEU))
+ comparison = (comparison == GTU) ? NE : EQ;
+ else if (mode == SImode
+ && ((val == 0x7fffffff
+ && (comparison == GTU || comparison == LEU))
+ || ((unsigned HOST_WIDE_INT) val
+ == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
+ && (comparison == GEU || comparison == LTU))))
+ {
+ comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
+ operands[2] = CONST0_RTX (mode);
+ }
+ }
+ op1 = operands[1];
+ if (!no_new_pseudos)
+ operands[1] = force_reg (mode, op1);
+ /* When we are handling DImode comparisons, we want to keep constants so
+ that we can optimize the component comparisons; however, memory loads
+ are better issued as a whole so that they can be scheduled well.
+ SImode equality comparisons allow I08 constants, but only when they
+ compare r0. Hence, if operands[1] has to be loaded from somewhere else
+ into a register, that register might as well be r0, and we allow the
+ constant. If it is already in a register, this is likely to be
+ allocatated to a different hard register, thus we load the constant into
+ a register unless it is zero. */
+ if (!REG_P (operands[2])
+ && (GET_CODE (operands[2]) != CONST_INT
+ || (mode == SImode && operands[2] != CONST0_RTX (SImode)
+ && ((comparison != EQ && comparison != NE)
+ || (REG_P (op1) && REGNO (op1) != R0_REG)
+ || !CONST_OK_FOR_I08 (INTVAL (operands[2]))))))
+ {
+ if (scratch && GET_MODE (scratch) == mode)
+ {
+ emit_move_insn (scratch, operands[2]);
+ operands[2] = scratch;
+ }
+ else if (!no_new_pseudos)
+ operands[2] = force_reg (mode, operands[2]);
+ }
+ return comparison;
+}
+
+void
+expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
+{
+ rtx (*branch_expander) (rtx) = gen_branch_true;
+ rtx jump;
+
+ comparison = prepare_cbranch_operands (operands, SImode, comparison);
+ switch (comparison)
+ {
+ case NE: case LT: case LE: case LTU: case LEU:
+ comparison = reverse_condition (comparison);
+ branch_expander = gen_branch_false;
+ default: ;
+ }
+ emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
+ gen_rtx_fmt_ee (comparison, SImode,
+ operands[1], operands[2])));
+ jump = emit_jump_insn (branch_expander (operands[3]));
+ if (probability >= 0)
+ REG_NOTES (jump)
+ = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (probability),
+ REG_NOTES (jump));
+
+}
+
+/* ??? How should we distribute probabilities when more than one branch
+ is generated. So far we only have soem ad-hoc observations:
+ - If the operands are random, they are likely to differ in both parts.
+ - If comparing items in a hash chain, the operands are random or equal;
+ operation should be EQ or NE.
+ - If items are searched in an ordered tree from the root, we can expect
+ the highpart to be unequal about half of the time; operation should be
+ an unequality comparison, operands non-constant, and overall probability
+ about 50%. Likewise for quicksort.
+ - Range checks will be often made against constants. Even if we assume for
+ simplicity an even distribution of the non-constant operand over a
+ sub-range here, the same probability could be generated with differently
+ wide sub-ranges - as long as the ratio of the part of the subrange that
+ is before the threshold to the part that comes after the threshold stays
+ the same. Thus, we can't really tell anything here;
+ assuming random distribution is at least simple.
+ */
+
+bool
+expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
+{
+ enum rtx_code msw_taken, msw_skip, lsw_taken;
+ rtx skip_label;
+ rtx op1h, op1l, op2h, op2l;
+ int num_branches;
+ int prob, rev_prob;
+ int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
+
+ comparison = prepare_cbranch_operands (operands, DImode, comparison);
+ op1h = gen_highpart_mode (SImode, DImode, operands[1]);
+ op2h = gen_highpart_mode (SImode, DImode, operands[2]);
+ op1l = gen_lowpart (SImode, operands[1]);
+ op2l = gen_lowpart (SImode, operands[2]);
+ msw_taken = msw_skip = lsw_taken = CODE_FOR_nothing;
+ prob = split_branch_probability;
+ rev_prob = REG_BR_PROB_BASE - prob;
+ switch (comparison)
+ {
+ /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
+ That costs 1 cycle more when the first branch can be predicted taken,
+ but saves us mispredicts because only one branch needs prediction.
+ It also enables generating the cmpeqdi_t-1 pattern. */
+ case EQ:
+ if (TARGET_CMPEQDI_T)
+ {
+ emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
+ emit_jump_insn (gen_branch_true (operands[3]));
+ return true;
+ }
+ msw_skip = NE;
+ lsw_taken = EQ;
+ if (prob >= 0)
+ {
+ /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
+ */
+ msw_skip_prob = rev_prob;
+ if (REG_BR_PROB_BASE <= 65535)
+ lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
+ else
+ {
+ gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
+ lsw_taken_prob
+ = (prob
+ ? (REG_BR_PROB_BASE
+ - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
+ / ((HOST_WIDEST_INT) prob << 32)))
+ : 0);
+ }
+ }
+ break;
+ case NE:
+ if (TARGET_CMPEQDI_T)
+ {
+ emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
+ emit_jump_insn (gen_branch_false (operands[3]));
+ return true;
+ }
+ msw_taken = NE;
+ lsw_taken_prob = prob;
+ lsw_taken = NE;
+ lsw_taken_prob = 0;
+ break;
+ case GTU: case GT:
+ msw_taken = comparison;
+ if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
+ break;
+ if (comparison != GTU || op2h != CONST0_RTX (SImode))
+ msw_skip = swap_condition (msw_taken);
+ lsw_taken = GTU;
+ break;
+ case GEU: case GE:
+ if (op2l == CONST0_RTX (SImode))
+ msw_taken = comparison;
+ else
+ {
+ msw_taken = comparison == GE ? GT : GTU;
+ msw_skip = swap_condition (msw_taken);
+ lsw_taken = GEU;
+ }
+ break;
+ case LTU: case LT:
+ msw_taken = comparison;
+ if (op2l == CONST0_RTX (SImode))
+ break;
+ msw_skip = swap_condition (msw_taken);
+ lsw_taken = LTU;
+ break;
+ case LEU: case LE:
+ if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1)
+ msw_taken = comparison;
+ else
+ {
+ lsw_taken = LEU;
+ if (comparison == LE)
+ msw_taken = LT;
+ else if (op2h != CONST0_RTX (SImode))
+ msw_taken = LTU;
+ else
+ break;
+ msw_skip = swap_condition (msw_taken);
+ }
+ break;
+ default: return false;
+ }
+ num_branches = ((msw_taken != CODE_FOR_nothing)
+ + (msw_skip != CODE_FOR_nothing)
+ + (lsw_taken != CODE_FOR_nothing));
+ if (comparison != EQ && comparison != NE && num_branches > 1)
+ {
+ if (!CONSTANT_P (operands[2])
+ && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
+ && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
+ {
+ msw_taken_prob = prob / 2U;
+ msw_skip_prob
+ = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
+ lsw_taken_prob = prob;
+ }
+ else
+ {
+ msw_taken_prob = prob;
+ msw_skip_prob = REG_BR_PROB_BASE;
+ /* ??? If we have a constant op2h, should we use that when
+ calculating lsw_taken_prob? */
+ lsw_taken_prob = prob;
+ }
+ }
+ operands[1] = op1h;
+ operands[2] = op2h;
+ operands[4] = NULL_RTX;
+ if (msw_taken != CODE_FOR_nothing)
+ expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
+ if (msw_skip != CODE_FOR_nothing)
+ {
+ rtx taken_label = operands[3];
+
+ operands[3] = skip_label = gen_label_rtx ();
+ expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
+ operands[3] = taken_label;
+ }
+ operands[1] = op1l;
+ operands[2] = op2l;
+ if (lsw_taken != CODE_FOR_nothing)
+ expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
+ if (msw_skip != CODE_FOR_nothing)
+ emit_label (skip_label);
+ return true;
+}
+
/* Prepare the operands for an scc instruction; make sure that the
compare has been done. */
rtx
@@ -1723,6 +2012,12 @@ output_branch (int logic, rtx insn, rtx *operands)
}
}
+/* Output a code sequence for INSN using TEMPLATE with OPERANDS; but before,
+ fill in operands 9 as a label to the successor insn.
+ We try to use jump threading where possible.
+ IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
+ we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
+ follow jmp and bt, if the address is in range. */
const char *
output_branchy_insn (enum rtx_code code, const char *template,
rtx insn, rtx *operands)
@@ -2117,6 +2412,15 @@ sh_rtx_costs (rtx x, int code, int outer_code, int *total)
else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
&& CONST_OK_FOR_K08 (INTVAL (x)))
*total = 1;
+ /* prepare_cmp_insn will force costly constants int registers before
+ the cbrach[sd]i4 pattterns can see them, so preserve potentially
+ interesting ones not covered by I08 above. */
+ else if (outer_code == COMPARE
+ && ((unsigned HOST_WIDE_INT) INTVAL (x)
+ == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
+ || INTVAL (x) == 0x7fffffff
+ || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
+ *total = 1;
else
*total = 8;
return true;
@@ -2135,6 +2439,11 @@ sh_rtx_costs (rtx x, int code, int outer_code, int *total)
case CONST_DOUBLE:
if (TARGET_SHMEDIA)
*total = COSTS_N_INSNS (4);
+ /* prepare_cmp_insn will force costly constants int registers before
+ the cbrachdi4 patttern can see them, so preserve potentially
+ interesting ones. */
+ else if (outer_code == COMPARE && GET_MODE (x) == DImode)
+ *total = 1;
else
*total = 10;
return true;
@@ -8571,23 +8880,32 @@ sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
}
else if (REG_NOTE_KIND (link) == 0)
{
- enum attr_type dep_type, type;
+ enum attr_type type;
+ rtx dep_set;
if (recog_memoized (insn) < 0
|| recog_memoized (dep_insn) < 0)
return cost;
- dep_type = get_attr_type (dep_insn);
- if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
- cost--;
- if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
- && (type = get_attr_type (insn)) != TYPE_CALL
- && type != TYPE_SFUNC)
- cost--;
+ dep_set = single_set (dep_insn);
+ /* The latency that we specify in the scheduling description refers
+ to the actual output, not to an auto-increment register; for that,
+ the latency is one. */
+ if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
+ {
+ rtx set = single_set (insn);
+
+ if (set
+ && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
+ && (!MEM_P (SET_DEST (set))
+ || !reg_mentioned_p (SET_DEST (dep_set),
+ XEXP (SET_DEST (set), 0))))
+ cost = 1;
+ }
/* The only input for a call that is timing-critical is the
function's address. */
- if (GET_CODE(insn) == CALL_INSN)
+ if (GET_CODE (insn) == CALL_INSN)
{
rtx call = PATTERN (insn);
@@ -8599,12 +8917,16 @@ sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
/* sibcalli_thunk uses a symbol_ref in an unspec. */
&& (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
|| ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
- cost = 0;
+ cost -= TARGET_SH4_300 ? 3 : 6;
}
/* Likewise, the most timing critical input for an sfuncs call
is the function address. However, sfuncs typically start
using their arguments pretty quickly.
- Assume a four cycle delay before they are needed. */
+ Assume a four cycle delay for SH4 before they are needed.
+ Cached ST40-300 calls are quicker, so assume only a one
+ cycle delay there.
+ ??? Maybe we should encode the delays till input registers
+ are needed by sfuncs into the sfunc call insn. */
/* All sfunc calls are parallels with at least four components.
Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
else if (GET_CODE (PATTERN (insn)) == PARALLEL
@@ -8612,50 +8934,83 @@ sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
&& (reg = sfunc_uses_reg (insn)))
{
if (! reg_set_p (reg, dep_insn))
- cost -= 4;
- }
- /* When the preceding instruction loads the shift amount of
- the following SHAD/SHLD, the latency of the load is increased
- by 1 cycle. */
- else if (TARGET_SH4
- && get_attr_type (insn) == TYPE_DYN_SHIFT
- && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
- && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
- XEXP (SET_SRC (single_set (insn)),
- 1)))
- cost++;
- /* When an LS group instruction with a latency of less than
- 3 cycles is followed by a double-precision floating-point
- instruction, FIPR, or FTRV, the latency of the first
- instruction is increased to 3 cycles. */
- else if (cost < 3
- && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
- && get_attr_dfp_comp (insn) == DFP_COMP_YES)
- cost = 3;
- /* The lsw register of a double-precision computation is ready one
- cycle earlier. */
- else if (reload_completed
- && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
- && (use_pat = single_set (insn))
- && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
- SET_SRC (use_pat)))
- cost -= 1;
-
- if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
- && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
- cost -= 1;
+ cost -= TARGET_SH4_300 ? 1 : 4;
+ }
+ if (TARGET_HARD_SH4 && !TARGET_SH4_300)
+ {
+ enum attr_type dep_type = get_attr_type (dep_insn);
+
+ if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
+ cost--;
+ else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
+ && (type = get_attr_type (insn)) != TYPE_CALL
+ && type != TYPE_SFUNC)
+ cost--;
+ /* When the preceding instruction loads the shift amount of
+ the following SHAD/SHLD, the latency of the load is increased
+ by 1 cycle. */
+ if (get_attr_type (insn) == TYPE_DYN_SHIFT
+ && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
+ && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
+ XEXP (SET_SRC (single_set (insn)),
+ 1)))
+ cost++;
+ /* When an LS group instruction with a latency of less than
+ 3 cycles is followed by a double-precision floating-point
+ instruction, FIPR, or FTRV, the latency of the first
+ instruction is increased to 3 cycles. */
+ else if (cost < 3
+ && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
+ && get_attr_dfp_comp (insn) == DFP_COMP_YES)
+ cost = 3;
+ /* The lsw register of a double-precision computation is ready one
+ cycle earlier. */
+ else if (reload_completed
+ && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
+ && (use_pat = single_set (insn))
+ && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
+ SET_SRC (use_pat)))
+ cost -= 1;
+
+ if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
+ && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
+ cost -= 1;
+ }
+ else if (TARGET_SH4_300)
+ {
+ /* Stores need their input register two cycles later. */
+ if (dep_set && cost >= 1
+ && ((type = get_attr_type (insn)) == TYPE_STORE
+ || type == TYPE_PSTORE
+ || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
+ {
+ rtx set = single_set (insn);
+
+ if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
+ && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
+ {
+ cost -= 2;
+ /* But don't reduce the cost below 1 if the address depends
+ on a side effect of dep_insn. */
+ if (cost < 1
+ && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
+ cost = 1;
+ }
+ }
+ }
}
/* An anti-dependence penalty of two applies if the first insn is a double
precision fadd / fsub / fmul. */
- else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
+ else if (!TARGET_SH4_300
+ && REG_NOTE_KIND (link) == REG_DEP_ANTI
&& recog_memoized (dep_insn) >= 0
- && get_attr_type (dep_insn) == TYPE_DFP_ARITH
+ && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
+ || get_attr_type (dep_insn) == TYPE_DFP_MUL)
/* A lot of alleged anti-flow dependences are fake,
so check this one is real. */
&& flow_dependent_p (dep_insn, insn))
cost = 2;
-
return cost;
}
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h
index fc4e1f282a4..1b659c75135 100644
--- a/gcc/config/sh/sh.h
+++ b/gcc/config/sh/sh.h
@@ -274,6 +274,7 @@ do { \
#endif
#if SUPPORT_SH2
#define SUPPORT_SH3 1
+#define SUPPORT_SH2A_NOFPU 1
#endif
#if SUPPORT_SH3
#define SUPPORT_SH4_NOFPU 1
@@ -281,16 +282,17 @@ do { \
#if SUPPORT_SH4_NOFPU
#define SUPPORT_SH4A_NOFPU 1
#define SUPPORT_SH4AL 1
-#define SUPPORT_SH2A_NOFPU 1
#endif
#if SUPPORT_SH2E
#define SUPPORT_SH3E 1
+#define SUPPORT_SH2A_SINGLE_ONLY 1
#endif
#if SUPPORT_SH3E
#define SUPPORT_SH4_SINGLE_ONLY 1
+#endif
+#if SUPPORT_SH4_SINGLE_ONLY
#define SUPPORT_SH4A_SINGLE_ONLY 1
-#define SUPPORT_SH2A_SINGLE_ONLY 1
#endif
#if SUPPORT_SH4
@@ -469,6 +471,11 @@ do { \
target_flags |= MASK_SMALLCODE; \
sh_div_str = SH_DIV_STR_FOR_SIZE ; \
} \
+ else \
+ { \
+ TARGET_CBRANCHDI4 = 1; \
+ TARGET_EXPAND_CBRANCHDI4 = 1; \
+ } \
/* We can't meaningfully test TARGET_SHMEDIA here, because -m options \
haven't been parsed yet, hence we'd read only the default. \
sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so \
@@ -608,6 +615,7 @@ do { \
else \
sh_div_strategy = SH_DIV_INV; \
} \
+ TARGET_CBRANCHDI4 = 0; \
} \
/* -fprofile-arcs needs a working libgcov . In unified tree \
configurations with newlib, this requires to configure with \
@@ -668,6 +676,9 @@ do { \
sh_divsi3_libfunc = "__sdivsi3_1"; \
else \
sh_divsi3_libfunc = "__sdivsi3"; \
+ if (sh_branch_cost == -1) \
+ sh_branch_cost \
+ = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1; \
if (TARGET_FMOVD) \
reg_class_from_letter['e' - 'a'] = NO_REGS; \
\
@@ -844,7 +855,7 @@ do { \
((GET_MODE_CLASS (TYPE_MODE (TYPE)) == MODE_COMPLEX_INT \
|| GET_MODE_CLASS (TYPE_MODE (TYPE)) == MODE_COMPLEX_FLOAT) \
? (unsigned) MIN (BIGGEST_ALIGNMENT, GET_MODE_BITSIZE (TYPE_MODE (TYPE))) \
- : (unsigned) ALIGN)
+ : (unsigned) DATA_ALIGNMENT(TYPE, ALIGN))
/* Make arrays of chars word-aligned for the same reasons. */
#define DATA_ALIGNMENT(TYPE, ALIGN) \
@@ -2288,6 +2299,7 @@ struct sh_args {
#define CONSTANT_ADDRESS_P(X) (GET_CODE (X) == LABEL_REF)
/* Nonzero if the constant value X is a legitimate general operand. */
+/* can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
#define LEGITIMATE_CONSTANT_P(X) \
(TARGET_SHMEDIA \
@@ -2298,7 +2310,7 @@ struct sh_args {
|| TARGET_SHMEDIA64) \
: (GET_CODE (X) != CONST_DOUBLE \
|| GET_MODE (X) == DFmode || GET_MODE (X) == SFmode \
- || (TARGET_SH2E && (fp_zero_operand (X) || fp_one_operand (X)))))
+ || GET_MODE (X) == DImode || GET_MODE (X) == VOIDmode))
/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
and check its validity for a certain class.
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index d091dfe0eff..a37c58308e3 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -204,7 +204,9 @@
;; load_si Likewise, SImode variant for general register.
;; fload Likewise, but load to fp register.
;; store to memory
+;; fstore floating point register to memory
;; move general purpose register to register
+;; movi8 8 bit immediate to general purpose register
;; mt_group other sh4 mt instructions
;; fmove register to register, floating point
;; smpy word precision integer multiply
@@ -221,11 +223,15 @@
;; sfunc special function call with known used registers
;; call function call
;; fp floating point
+;; fpscr_toggle toggle a bit in the fpscr
;; fdiv floating point divide (or square root)
;; gp_fpul move from general purpose register to fpul
;; fpul_gp move from fpul to general purpose register
;; mac_gp move from mac[lh] to general purpose register
-;; dfp_arith, dfp_cmp,dfp_conv
+;; gp_mac move from general purpose register to mac[lh]
+;; mac_mem move from mac[lh] to memory
+;; mem_mac move from memory to mac[lh]
+;; dfp_arith,dfp_mul, fp_cmp,dfp_cmp,dfp_conv
;; ftrc_s fix_truncsfsi2_i4
;; dfdiv double precision floating point divide (or square root)
;; cwb ic_invalidate_line_i
@@ -263,7 +269,7 @@
;; nil no-op move, will be deleted.
(define_attr "type"
- "mt_group,cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,load,load_si,fload,store,move,fmove,smpy,dmpy,return,pload,prset,pstore,prget,pcload,pcload_si,pcfload,rte,sfunc,call,fp,fdiv,ftrc_s,dfp_arith,dfp_cmp,dfp_conv,dfdiv,gp_fpul,fpul_gp,mac_gp,mem_fpscr,gp_fpscr,cwb,movua,fsrra,fsca,tls_load,arith_media,cbranch_media,cmp_media,dfdiv_media,dfmul_media,dfparith_media,dfpconv_media,dmpy_media,fcmp_media,fdiv_media,fload_media,fmove_media,fparith_media,fpconv_media,fstore_media,gettr_media,invalidate_line_media,jump_media,load_media,pt_media,ptabs_media,store_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media,nil,other"
+ "mt_group,cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,load,load_si,fload,store,fstore,move,movi8,fmove,smpy,dmpy,return,pload,prset,pstore,prget,pcload,pcload_si,pcfload,rte,sfunc,call,fp,fpscr_toggle,fdiv,ftrc_s,dfp_arith,dfp_mul,fp_cmp,dfp_cmp,dfp_conv,dfdiv,gp_fpul,fpul_gp,mac_gp,gp_mac,mac_mem,mem_mac,mem_fpscr,gp_fpscr,cwb,movua,fsrra,fsca,tls_load,arith_media,cbranch_media,cmp_media,dfdiv_media,dfmul_media,dfparith_media,dfpconv_media,dmpy_media,fcmp_media,fdiv_media,fload_media,fmove_media,fparith_media,fpconv_media,fstore_media,gettr_media,invalidate_line_media,jump_media,load_media,pt_media,ptabs_media,store_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media,nil,other"
(const_string "other"))
;; We define a new attribute namely "insn_class".We use
@@ -279,12 +285,12 @@
(define_attr "insn_class"
"mt_group,ex_group,ls_group,br_group,fe_group,co_group,none"
(cond [(eq_attr "type" "move,mt_group") (const_string "mt_group")
- (eq_attr "type" "arith,dyn_shift") (const_string "ex_group")
- (eq_attr "type" "fmove,load,pcload,load_si,pcload_si,fload,pcfload,store,gp_fpul,fpul_gp") (const_string "ls_group")
+ (eq_attr "type" "movi8,arith,dyn_shift") (const_string "ex_group")
+ (eq_attr "type" "fmove,load,pcload,load_si,pcload_si,fload,pcfload,store,fstore,gp_fpul,fpul_gp") (const_string "ls_group")
(eq_attr "type" "cbranch,jump") (const_string "br_group")
- (eq_attr "type" "fp,fdiv,ftrc_s,dfp_arith,dfp_conv,dfdiv")
+ (eq_attr "type" "fp,fp_cmp,fdiv,ftrc_s,dfp_arith,dfp_mul,dfp_conv,dfdiv")
(const_string "fe_group")
- (eq_attr "type" "jump_ind,smpy,dmpy,mac_gp,return,pload,prset,pstore,prget,rte,sfunc,call,dfp_cmp,mem_fpscr,gp_fpscr,cwb") (const_string "co_group")]
+ (eq_attr "type" "jump_ind,smpy,dmpy,mac_gp,return,pload,prset,pstore,prget,rte,sfunc,call,dfp_cmp,mem_fpscr,gp_fpscr,cwb,gp_mac,mac_mem,mem_mac") (const_string "co_group")]
(const_string "none")))
;; nil are zero instructions, and arith3 / arith3b are multiple instructions,
;; so these do not belong in an insn group, although they are modeled
@@ -494,14 +500,14 @@
;; SH4 Double-precision computation with double-precision result -
;; the two halves are ready at different times.
(define_attr "dfp_comp" "yes,no"
- (cond [(eq_attr "type" "dfp_arith,dfp_conv,dfdiv") (const_string "yes")]
+ (cond [(eq_attr "type" "dfp_arith,dfp_mul,dfp_conv,dfdiv") (const_string "yes")]
(const_string "no")))
;; Insns for which the latency of a preceding fp insn is decreased by one.
(define_attr "late_fp_use" "yes,no" (const_string "no"))
;; And feeding insns for which this relevant.
(define_attr "any_fp_comp" "yes,no"
- (cond [(eq_attr "type" "fp,fdiv,ftrc_s,dfp_arith,dfp_conv,dfdiv")
+ (cond [(eq_attr "type" "fp,fdiv,ftrc_s,dfp_arith,dfp_mul,dfp_conv,dfdiv")
(const_string "yes")]
(const_string "no")))
@@ -609,15 +615,37 @@
[(set_attr "type" "mt_group")])
;; -------------------------------------------------------------------------
+;; SImode compare and branch
+;; -------------------------------------------------------------------------
+
+(define_expand "cbranchsi4"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand:SI 1 "arith_operand" "")
+ (match_operand:SI 2 "arith_operand" "")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))
+ (clobber (reg:SI T_REG))]
+ "TARGET_CBRANCHDI4"
+ "expand_cbranchsi4 (operands, CODE_FOR_nothing, -1); DONE;")
+
+;; -------------------------------------------------------------------------
;; SImode unsigned integer comparisons
;; -------------------------------------------------------------------------
-(define_insn "cmpgeusi_t"
+(define_insn_and_split "cmpgeusi_t"
[(set (reg:SI T_REG)
(geu:SI (match_operand:SI 0 "arith_reg_operand" "r")
- (match_operand:SI 1 "arith_reg_operand" "r")))]
+ (match_operand:SI 1 "arith_reg_or_0_operand" "rN")))]
"TARGET_SH1"
"cmp/hs %1,%0"
+ "&& operands[0] == CONST0_RTX (SImode)"
+ [(pc)]
+ "
+{
+ emit_insn (gen_sett ());
+ DONE;
+}"
[(set_attr "type" "mt_group")])
(define_insn "cmpgtusi_t"
@@ -647,12 +675,64 @@
}")
;; -------------------------------------------------------------------------
-;; DImode signed integer comparisons
+;; DImode compare and branch
;; -------------------------------------------------------------------------
-;; ??? Could get better scheduling by splitting the initial test from the
-;; rest of the insn after reload. However, the gain would hardly justify
-;; the sh.md size increase necessary to do that.
+
+;; arith3 patterns don't work well with the sh4-300 branch prediction mechanism.
+;; Therefore, we aim to have a set of three branches that go straight to the
+;; destination, i.e. only one of them is taken at any one time.
+;; This mechanism should also be slightly better for the sh4-200.
+
+(define_expand "cbranchdi4"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand:DI 1 "arith_operand" "")
+ (match_operand:DI 2 "arith_operand" "")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))
+ (clobber (match_dup 4))
+ (clobber (reg:SI T_REG))]
+ "TARGET_CBRANCHDI4"
+ "
+{
+ enum rtx_code comparison;
+
+ if (TARGET_EXPAND_CBRANCHDI4)
+ {
+ if (expand_cbranchdi4 (operands, CODE_FOR_nothing))
+ DONE;
+ }
+ comparison = prepare_cbranch_operands (operands, DImode, CODE_FOR_nothing);
+ if (comparison != GET_CODE (operands[0]))
+ operands[0]
+ = gen_rtx_fmt_ee (VOIDmode, comparison, operands[1], operands[2]);
+ operands[4] = gen_rtx_SCRATCH (SImode);
+}")
+
+(define_insn_and_split "cbranchdi4_i"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand:DI 1 "arith_operand" "r,r")
+ (match_operand:DI 2 "arith_operand" "rN,i")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))
+ (clobber (match_scratch:SI 4 "=X,&r"))
+ (clobber (reg:SI T_REG))]
+ "TARGET_CBRANCHDI4"
+ "#"
+ "&& reload_completed"
+ [(pc)]
+ "
+{
+ if (!expand_cbranchdi4 (operands, GET_CODE (operands[0])))
+ FAIL;
+ DONE;
+}")
+
+;; -------------------------------------------------------------------------
+;; DImode signed integer comparisons
+;; -------------------------------------------------------------------------
(define_insn ""
[(set (reg:SI T_REG)
@@ -4736,7 +4816,7 @@ label:
[(set (mem:SF (pre_dec:SI (reg:SI SP_REG))) (reg:SF FPUL_REG))]
"TARGET_SH2E && ! TARGET_SH5"
"sts.l fpul,@-r15"
- [(set_attr "type" "store")
+ [(set_attr "type" "fstore")
(set_attr "late_fp_use" "yes")
(set_attr "hit_stack" "yes")])
@@ -4818,9 +4898,9 @@ label:
;; (made from (set (subreg:SI (reg:QI ###) 0) ) into T.
(define_insn "movsi_i"
[(set (match_operand:SI 0 "general_movdst_operand"
- "=r,r,t,r,r,r,r,m,<,<,x,l,x,l,r")
+ "=r,r,r,t,r,r,r,r,m,<,<,x,l,x,l,r")
(match_operand:SI 1 "general_movsrc_operand"
- "Q,rI08,r,mr,x,l,t,r,x,l,r,r,>,>,i"))]
+ "Q,r,I08,r,mr,x,l,t,r,x,l,r,r,>,>,i"))]
"TARGET_SH1
&& ! TARGET_SH2E
&& ! TARGET_SH2A
@@ -4829,6 +4909,7 @@ label:
"@
mov.l %1,%0
mov %1,%0
+ mov %1,%0
cmp/pl %1
mov.l %1,%0
sts %1,%0
@@ -4842,8 +4923,8 @@ label:
lds.l %1,%0
lds.l %1,%0
fake %1,%0"
- [(set_attr "type" "pcload_si,move,mt_group,load_si,mac_gp,prget,move,store,store,pstore,move,prset,load,pload,pcload_si")
- (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
+ [(set_attr "type" "pcload_si,move,movi8,mt_group,load_si,mac_gp,prget,arith,mac_mem,store,pstore,gp_mac,prset,mem_mac,pload,pcload_si")
+ (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
;; t/r must come after r/r, lest reload will try to reload stuff like
;; (subreg:SI (reg:SF FR14_REG) 0) into T (compiling stdlib/strtod.c -m3e -O2)
@@ -4853,15 +4934,16 @@ label:
;; TARGET_FMOVD is in effect, and mode switching is done before reload.
(define_insn "movsi_ie"
[(set (match_operand:SI 0 "general_movdst_operand"
- "=r,r,r,t,r,r,r,r,m,<,<,x,l,x,l,y,<,r,y,r,*f,y,*f,y")
+ "=r,r,r,r,t,r,r,r,r,m,<,<,x,l,x,l,y,<,r,y,r,*f,y,*f,y")
(match_operand:SI 1 "general_movsrc_operand"
- "Q,rI08,I20,r,mr,x,l,t,r,x,l,r,r,>,>,>,y,i,r,y,y,*f,*f,y"))]
+ "Q,r,I08,I20,r,mr,x,l,t,r,x,l,r,r,>,>,>,y,i,r,y,y,*f,*f,y"))]
"(TARGET_SH2E || TARGET_SH2A)
&& (register_operand (operands[0], SImode)
|| register_operand (operands[1], SImode))"
"@
mov.l %1,%0
mov %1,%0
+ mov %1,%0
movi20 %1,%0
cmp/pl %1
mov.l %1,%0
@@ -4884,26 +4966,27 @@ label:
flds %1,fpul
fmov %1,%0
! move optimized away"
- [(set_attr "type" "pcload_si,move,move,*,load_si,mac_gp,prget,move,store,store,pstore,move,prset,load,pload,load,store,pcload_si,gp_fpul,fpul_gp,fmove,fmove,fmove,nil")
- (set_attr "late_fp_use" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes,*,*,yes,*,*,*,*")
- (set_attr "length" "*,*,4,*,4,*,*,*,4,*,*,*,*,*,*,*,*,*,*,*,*,*,*,0")])
+ [(set_attr "type" "pcload_si,move,movi8,move,*,load_si,mac_gp,prget,arith,store,mac_mem,pstore,gp_mac,prset,mem_mac,pload,load,fstore,pcload_si,gp_fpul,fpul_gp,fmove,fmove,fmove,nil")
+ (set_attr "late_fp_use" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes,*,*,yes,*,*,*,*")
+ (set_attr "length" "*,*,*,4,*,4,*,*,*,4,*,*,*,*,*,*,*,*,*,*,*,*,*,*,0")])
(define_insn "movsi_i_lowpart"
- [(set (strict_low_part (match_operand:SI 0 "general_movdst_operand" "+r,r,r,r,r,r,m,r"))
- (match_operand:SI 1 "general_movsrc_operand" "Q,rI08,mr,x,l,t,r,i"))]
+ [(set (strict_low_part (match_operand:SI 0 "general_movdst_operand" "+r,r,r,r,r,r,r,m,r"))
+ (match_operand:SI 1 "general_movsrc_operand" "Q,r,I08,mr,x,l,t,r,i"))]
"TARGET_SH1
&& (register_operand (operands[0], SImode)
|| register_operand (operands[1], SImode))"
"@
mov.l %1,%0
mov %1,%0
+ mov %1,%0
mov.l %1,%0
sts %1,%0
sts %1,%0
movt %0
mov.l %1,%0
fake %1,%0"
- [(set_attr "type" "pcload,move,load,move,prget,move,store,pcload")])
+ [(set_attr "type" "pcload,move,arith,load,mac_gp,prget,arith,store,pcload")])
(define_insn_and_split "load_ra"
[(set (match_operand:SI 0 "general_movdst_operand" "")
@@ -5155,19 +5238,20 @@ label:
(set_attr "needs_delay_slot" "yes")])
(define_insn "movqi_i"
- [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,m,r,r,l")
- (match_operand:QI 1 "general_movsrc_operand" "ri,m,r,t,l,r"))]
+ [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,r,m,r,r,l")
+ (match_operand:QI 1 "general_movsrc_operand" "r,i,m,r,t,l,r"))]
"TARGET_SH1
&& (arith_reg_operand (operands[0], QImode)
|| arith_reg_operand (operands[1], QImode))"
"@
mov %1,%0
+ mov %1,%0
mov.b %1,%0
mov.b %1,%0
movt %0
sts %1,%0
lds %1,%0"
- [(set_attr "type" "move,load,store,move,move,move")])
+ [(set_attr "type" "move,movi8,load,store,arith,prget,prset")])
(define_insn "*movqi_media"
[(set (match_operand:QI 0 "general_movdst_operand" "=r,r,r,m")
@@ -5769,7 +5853,7 @@ label:
(if_then_else
(ne (symbol_ref "TARGET_SHCOMPACT") (const_int 0))
(const_int 10) (const_int 8))])
- (set_attr "type" "fmove,move,pcfload,fload,store,pcload,load,store,load,fload")
+ (set_attr "type" "fmove,move,pcfload,fload,fstore,pcload,load,store,load,fload")
(set_attr "late_fp_use" "*,*,*,*,yes,*,*,*,*,*")
(set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes")
(const_string "double")
@@ -6486,7 +6570,7 @@ label:
sts.l %1,%0
lds.l %1,%0
! move optimized away"
- [(set_attr "type" "fmove,move,fmove,fmove,pcfload,fload,store,pcload,load,store,fmove,fmove,load,*,fpul_gp,gp_fpul,store,load,nil")
+ [(set_attr "type" "fmove,move,fmove,fmove,pcfload,fload,fstore,pcload,load,store,fmove,fmove,load,*,fpul_gp,gp_fpul,fstore,load,nil")
(set_attr "late_fp_use" "*,*,*,*,*,*,yes,*,*,*,*,*,*,*,yes,*,yes,*,*")
(set_attr "length" "*,*,*,*,4,4,4,*,*,*,2,2,2,4,2,2,2,2,0")
(set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes")
@@ -9929,7 +10013,7 @@ mov.l\\t1f,r0\\n\\
sts fpscr,%0
sts.l fpscr,%0"
[(set_attr "length" "0,2,2,4,2,2,2,2,2")
- (set_attr "type" "nil,mem_fpscr,load,mem_fpscr,gp_fpscr,move,store,mac_gp,store")])
+ (set_attr "type" "nil,mem_fpscr,load,mem_fpscr,gp_fpscr,move,store,mac_gp,fstore")])
(define_peephole2
[(set (reg:PSI FPSCR_REG)
@@ -9980,7 +10064,7 @@ mov.l\\t1f,r0\\n\\
(xor:PSI (reg:PSI FPSCR_REG) (const_int 1048576)))]
"(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"fschg"
- [(set_attr "type" "fp") (set_attr "fp_set" "unknown")])
+ [(set_attr "type" "fpscr_toggle") (set_attr "fp_set" "unknown")])
;; There's no way we can use it today, since optimize mode switching
;; doesn't enable us to know from which mode we're switching to the
@@ -9992,7 +10076,7 @@ mov.l\\t1f,r0\\n\\
(xor:PSI (reg:PSI FPSCR_REG) (const_int 524288)))]
"TARGET_SH4A_FP && ! TARGET_FPU_SINGLE"
"fpchg"
- [(set_attr "type" "fp")])
+ [(set_attr "type" "fpscr_toggle")])
(define_expand "addsf3"
[(set (match_operand:SF 0 "arith_reg_operand" "")
@@ -10124,25 +10208,12 @@ mov.l\\t1f,r0\\n\\
[(set_attr "type" "fp")
(set_attr "fp_mode" "single")])
-;; Unfortunately, the combiner is unable to cope with the USE of the FPSCR
-;; register in feeding fp instructions. Thus, we cannot generate fmac for
-;; mixed-precision SH4 targets. To allow it to be still generated for the
-;; SH3E, we use a separate insn for SH3E mulsf3.
-
(define_expand "mulsf3"
[(set (match_operand:SF 0 "fp_arith_reg_operand" "")
(mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "")
(match_operand:SF 2 "fp_arith_reg_operand" "")))]
"TARGET_SH2E || TARGET_SHMEDIA_FPU"
- "
-{
- if (TARGET_SH4 || TARGET_SH2A_SINGLE)
- expand_sf_binop (&gen_mulsf3_i4, operands);
- else if (TARGET_SH2E)
- emit_insn (gen_mulsf3_ie (operands[0], operands[1], operands[2]));
- if (! TARGET_SHMEDIA)
- DONE;
-}")
+ "")
(define_insn "*mulsf3_media"
[(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
@@ -10152,6 +10223,27 @@ mov.l\\t1f,r0\\n\\
"fmul.s %1, %2, %0"
[(set_attr "type" "fparith_media")])
+;; Unfortunately, the combiner is unable to cope with the USE of the FPSCR
+;; register in feeding fp instructions. Thus, in order to generate fmac,
+;; we start out with a mulsf pattern that does not depend on fpscr.
+;; This is split after combine to introduce the dependency, in order to
+;; get mode switching and scheduling right.
+(define_insn_and_split "mulsf3_ie"
+ [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+ (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0")
+ (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+ "TARGET_SH2E"
+ "fmul %2,%0"
+ "TARGET_SH4 || TARGET_SH2A_SINGLE"
+ [(const_int 0)]
+ "
+{
+ emit_insn (gen_mulsf3_i4 (operands[0], operands[1], operands[2],
+ get_fpscr_rtx ()));
+ DONE;
+}"
+ [(set_attr "type" "fp")])
+
(define_insn "mulsf3_i4"
[(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
(mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0")
@@ -10162,20 +10254,12 @@ mov.l\\t1f,r0\\n\\
[(set_attr "type" "fp")
(set_attr "fp_mode" "single")])
-(define_insn "mulsf3_ie"
- [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
- (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0")
- (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
- "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
- "fmul %2,%0"
- [(set_attr "type" "fp")])
-
(define_insn "mac_media"
[(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
(plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f")
(match_operand:SF 2 "fp_arith_reg_operand" "f"))
(match_operand:SF 3 "fp_arith_reg_operand" "0")))]
- "TARGET_SHMEDIA_FPU"
+ "TARGET_SHMEDIA_FPU && TARGET_FMAC"
"fmac.s %1, %2, %0"
[(set_attr "type" "fparith_media")])
@@ -10185,7 +10269,7 @@ mov.l\\t1f,r0\\n\\
(match_operand:SF 2 "fp_arith_reg_operand" "f"))
(match_operand:SF 3 "arith_reg_operand" "0")))
(use (match_operand:PSI 4 "fpscr_operand" "c"))]
- "TARGET_SH2E && ! TARGET_SH4"
+ "TARGET_SH2E && TARGET_FMAC"
"fmac fr0,%2,%0"
[(set_attr "type" "fp")
(set_attr "fp_mode" "single")])
@@ -10336,7 +10420,7 @@ mov.l\\t1f,r0\\n\\
(match_operand:SF 1 "fp_arith_reg_operand" "f")))]
"TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
"fcmp/gt %1,%0"
- [(set_attr "type" "fp")
+ [(set_attr "type" "fp_cmp")
(set_attr "fp_mode" "single")])
(define_insn "cmpeqsf_t"
@@ -10345,7 +10429,7 @@ mov.l\\t1f,r0\\n\\
(match_operand:SF 1 "fp_arith_reg_operand" "f")))]
"TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
"fcmp/eq %1,%0"
- [(set_attr "type" "fp")
+ [(set_attr "type" "fp_cmp")
(set_attr "fp_mode" "single")])
(define_insn "ieee_ccmpeqsf_t"
@@ -10365,7 +10449,7 @@ mov.l\\t1f,r0\\n\\
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
"(TARGET_SH4 || TARGET_SH2A_SINGLE)"
"fcmp/gt %1,%0"
- [(set_attr "type" "fp")
+ [(set_attr "type" "fp_cmp")
(set_attr "fp_mode" "single")])
(define_insn "cmpeqsf_t_i4"
@@ -10375,7 +10459,7 @@ mov.l\\t1f,r0\\n\\
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
"(TARGET_SH4 || TARGET_SH2A_SINGLE)"
"fcmp/eq %1,%0"
- [(set_attr "type" "fp")
+ [(set_attr "type" "fp_cmp")
(set_attr "fp_mode" "single")])
(define_insn "*ieee_ccmpeqsf_t_4"
@@ -10724,7 +10808,7 @@ mov.l\\t1f,r0\\n\\
(use (match_operand:PSI 3 "fpscr_operand" "c"))]
"(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
"fmul %2,%0"
- [(set_attr "type" "dfp_arith")
+ [(set_attr "type" "dfp_mul")
(set_attr "fp_mode" "double")])
(define_expand "divdf3"
diff --git a/gcc/config/sh/sh.opt b/gcc/config/sh/sh.opt
index 7f9a87e95d9..161fdd8dcaf 100644
--- a/gcc/config/sh/sh.opt
+++ b/gcc/config/sh/sh.opt
@@ -57,11 +57,11 @@ Target RejectNegative Condition(SUPPORT_SH2A_NOFPU)
Generate SH2a FPU-less code
m2a-single
-Target RejectNegative Condition (SUPPORT_SH2A_SINGLE)
+Target RejectNegative Condition(SUPPORT_SH2A_SINGLE)
Generate default single-precision SH2a code
m2a-single-only
-Target RejectNegative Condition (SUPPORT_SH2A_SINGLE_ONLY)
+Target RejectNegative Condition(SUPPORT_SH2A_SINGLE_ONLY)
Generate only single-precision SH2a code
m2e
@@ -88,10 +88,33 @@ m4-200
Target RejectNegative Condition(SUPPORT_SH4)
Generate SH4-200 code
+;; TARGET_SH4_300 indicates if we have the ST40-300 instruction set and
+;; pipeline - irrespective of ABI.
+m4-300
+Target RejectNegative Condition(SUPPORT_SH4) Var(TARGET_SH4_300)
+Generate SH4-300 code
+
m4-nofpu
Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
Generate SH4 FPU-less code
+m4-100-nofpu
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
+Generate SH4-100 FPU-less code
+
+m4-200-nofpu
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
+Generate SH4-200 FPU-less code
+
+m4-300-nofpu
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU) Var(TARGET_SH4_300) VarExists
+Generate SH4-300 FPU-less code
+
+m4-340
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU) Var(TARGET_SH4_300) VarExists
+Generate code for SH4 340 series (MMU/FPU-less)
+;; passes -isa=sh4-nommu-nofpu to the assembler.
+
m4-400
Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
Generate code for SH4 400 series (MMU/FPU-less)
@@ -114,6 +137,10 @@ m4-200-single
Target RejectNegative Condition(SUPPORT_SH4_SINGLE)
Generate default single-precision SH4-200 code
+m4-300-single
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE) Var(TARGET_SH4_300) VarExists
+Generate default single-precision SH4-300 code
+
m4-single-only
Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY)
Generate only single-precision SH4 code
@@ -126,6 +153,10 @@ m4-200-single-only
Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY)
Generate only single-precision SH4-200 code
+m4-300-single-only
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY) Var(TARGET_SH4_300) VarExists
+Generate only single-precision SH4-300 code
+
m4a
Target RejectNegative Mask(SH4A) Condition(SUPPORT_SH4A)
Generate SH4a code
@@ -182,6 +213,22 @@ mbigtable
Target Report RejectNegative Mask(BIGTABLE)
Generate 32-bit offsets in switch tables
+mbranch-cost=
+Target RejectNegative Joined UInteger Var(sh_branch_cost) Init(-1)
+Cost to assume for a branch insn
+
+mcbranchdi
+Target Var(TARGET_CBRANCHDI4)
+Enable cbranchdi4 pattern
+
+mexpand-cbranchdi
+Target Var(TARGET_EXPAND_CBRANCHDI4)
+Expand cbranchdi4 pattern early into separate comparisons and branches.
+
+mcmpeqdi
+Target Var(TARGET_CMPEQDI_T)
+Emit cmpeqdi_t pattern even when -mcbranchdi and -mexpand-cbranchdi are in effect.
+
mcut2-workaround
Target RejectNegative Var(TARGET_SH5_CUT2_WORKAROUND)
Enable SH5 cut2 workaround
@@ -192,7 +239,7 @@ Align doubles at 64-bit boundaries
mdiv=
Target RejectNegative Joined Var(sh_div_str) Init("")
-Division strategy, one of: call, call2, fp, inv, inv:minlat, inv20u, inv20l, inv:call, inv:call2, inv:fp call-div1 call-fp call-table
+Division strategy, one of: call, call2, fp, inv, inv:minlat, inv20u, inv20l, inv:call, inv:call2, inv:fp, call-div1, call-fp, call-table
mdivsi3_libfunc=
Target RejectNegative Joined Var(sh_divsi3_libfunc) Init("")
@@ -201,6 +248,10 @@ Specify name for 32 bit signed division function
mfmovd
Target RejectNegative Mask(FMOVD) Undocumented
+mfused-madd
+Target Var(TARGET_FMAC)
+Enable the use of the fused floating point multiply-accumulate operation
+
mgettrcost=
Target RejectNegative Joined UInteger Var(sh_gettrcost) Init(-1)
Cost to assume for gettr insn
diff --git a/gcc/config/sh/sh1.md b/gcc/config/sh/sh1.md
index 9dfdd86508f..1198fe737b9 100644
--- a/gcc/config/sh/sh1.md
+++ b/gcc/config/sh/sh1.md
@@ -1,5 +1,5 @@
;; DFA scheduling description for Renesas / SuperH SH.
-;; Copyright (C) 2004 Free Software Foundation, Inc.
+;; Copyright (C) 2004, 2006 Free Software Foundation, Inc.
;; This file is part of GCC.
@@ -45,7 +45,7 @@
(define_insn_reservation "sh1_load_store" 2
(and (eq_attr "pipe_model" "sh1")
- (eq_attr "type" "load,pcload,pload,store,pstore"))
+ (eq_attr "type" "load,pcload,pload,mem_mac,store,fstore,pstore,mac_mem"))
"sh1memory*2")
(define_insn_reservation "sh1_arith3" 3
@@ -76,7 +76,7 @@
(define_insn_reservation "sh1_fp" 2
(and (eq_attr "pipe_model" "sh1")
- (eq_attr "type" "fp,fmove"))
+ (eq_attr "type" "fp,fpscr_toggle,fp_cmp,fmove"))
"sh1fp")
(define_insn_reservation "sh1_fdiv" 13
diff --git a/gcc/config/sh/sh4-300.md b/gcc/config/sh/sh4-300.md
new file mode 100644
index 00000000000..228782a67fc
--- /dev/null
+++ b/gcc/config/sh/sh4-300.md
@@ -0,0 +1,288 @@
+;; DFA scheduling description for ST40-300.
+;; Copyright (C) 2004, 2006 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING. If not, write to
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+;; Load and store instructions save a cycle if they are aligned on a
+;; four byte boundary. Using a function unit for stores encourages
+;; gcc to separate load and store instructions by one instruction,
+;; which makes it more likely that the linker will be able to word
+;; align them when relaxing.
+
+;; The following description models the ST40-300 pipeline using the DFA based
+;; scheduler.
+
+;; Two automata are defined to reduce number of states
+;; which a single large automaton will have. (Factoring)
+
+(define_automaton "sh4_300_inst_pipeline,sh4_300_fpu_pipe")
+
+;; This unit is basically the decode unit of the processor.
+;; Since SH4 is a dual issue machine,it is as if there are two
+;; units so that any insn can be processed by either one
+;; of the decoding unit.
+
+(define_cpu_unit "sh4_300_pipe_01,sh4_300_pipe_02" "sh4_300_inst_pipeline")
+
+;; The floating point units.
+
+(define_cpu_unit "sh4_300_fpt,sh4_300_fpu,sh4_300_fds" "sh4_300_fpu_pipe")
+
+;; integer multiplier unit
+
+(define_cpu_unit "sh4_300_mul" "sh4_300_inst_pipeline")
+
+;; LS unit
+
+(define_cpu_unit "sh4_300_ls" "sh4_300_inst_pipeline")
+
+;; The address calculator used for branch instructions.
+;; This will be reserved after "issue" of branch instructions
+;; and this is to make sure that no two branch instructions
+;; can be issued in parallel.
+
+(define_cpu_unit "sh4_300_br" "sh4_300_inst_pipeline")
+
+;; ----------------------------------------------------
+;; This reservation is to simplify the dual issue description.
+
+(define_reservation "sh4_300_issue" "sh4_300_pipe_01|sh4_300_pipe_02")
+
+(define_reservation "all" "sh4_300_pipe_01+sh4_300_pipe_02")
+
+;;(define_insn_reservation "nil" 0 (eq_attr "type" "nil") "nothing")
+
+;; MOV RM,RN / MOV #imm8,RN / STS PR,RN
+(define_insn_reservation "sh4_300_mov" 0
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "move,movi8,prget"))
+ "sh4_300_issue")
+
+;; Fixed STS from MACL / MACH
+(define_insn_reservation "sh4_300_mac_gp" 0
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "mac_gp"))
+ "sh4_300_issue+sh4_300_mul")
+
+;; Fixed LDS to MACL / MACH
+(define_insn_reservation "sh4_300_gp_mac" 1
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "gp_mac"))
+ "sh4_300_issue+sh4_300_mul")
+
+;; Instructions without specific resource requirements with latency 1.
+
+(define_insn_reservation "sh4_300_simple_arith" 1
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "mt_group,arith,dyn_shift,prset"))
+ "sh4_300_issue")
+
+;; Load and store instructions have no alignment peculiarities for the ST40-300,
+;; but they use the load-store unit, which they share with the fmove type
+;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) .
+;; Loads have a latency of three.
+
+;; Load Store instructions.
+(define_insn_reservation "sh4_300_load" 3
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "load,pcload,load_si,pcload_si,pload"))
+ "sh4_300_issue+sh4_300_ls")
+
+(define_insn_reservation "sh4_300_mac_load" 3
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "mem_mac"))
+ "sh4_300_issue+sh4_300_ls+sh4_300_mul")
+
+(define_insn_reservation "sh4_300_fload" 4
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "fload,pcfload"))
+ "sh4_300_issue+sh4_300_ls+sh4_300_fpt")
+
+;; sh_adjust_cost describes the reduced latency of the feeding insns of a store.
+;; The latency of an auto-increment register is 1; the latency of the memory
+;; output is not actually considered here anyway.
+(define_insn_reservation "sh4_300_store" 1
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "store,pstore"))
+ "sh4_300_issue+sh4_300_ls")
+
+(define_insn_reservation "sh4_300_fstore" 1
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "fstore"))
+ "sh4_300_issue+sh4_300_ls+sh4_300_fpt")
+
+;; Fixed STS.L from MACL / MACH
+(define_insn_reservation "sh4_300_mac_store" 1
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "mac_mem"))
+ "sh4_300_issue+sh4_300_mul+sh4_300_ls")
+
+(define_insn_reservation "sh4_300_gp_fpul" 2
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "gp_fpul"))
+ "sh4_300_issue+sh4_300_fpt")
+
+(define_insn_reservation "sh4_300_fpul_gp" 1
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "fpul_gp"))
+ "sh4_300_issue+sh4_300_fpt")
+
+;; Branch (BF,BF/S,BT,BT/S,BRA)
+;; Branch Far (JMP,RTS,BRAF)
+;; Group: BR
+;; When displacement is 0 for BF / BT, we have effectively conditional
+;; execution of one instruction, without pipeline disruption.
+;; Otherwise, the latency depends on prediction success.
+;; We can't really do much with the latency, even if we could express it,
+;; but the pairing restrictions are useful to take into account.
+;; ??? If the branch is likely, and not paired with a preceding insn,
+;; or likely and likely not predicted, we might want to fill the delay slot.
+;; However, there appears to be no machinery to make the compiler
+;; recognize these scenarios.
+
+(define_insn_reservation "sh4_300_branch" 1
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "cbranch,jump,return,jump_ind"))
+ "sh4_300_issue+sh4_300_br")
+
+;; RTE
+(define_insn_reservation "sh4_300_return_from_exp" 9
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "rte"))
+ "sh4_300_pipe_01+sh4_300_pipe_02*9")
+
+;; OCBP, OCBWB
+;; Group: CO
+;; Latency: 1-5
+;; Issue Rate: 1
+
+;; cwb is used for the sequence ocbwb @%0; extu.w %0,%2; or %1,%2; mov.l %0,@%2
+;; This description is likely inexact, but this pattern should not actually
+;; appear when compiling for sh4-300; we should use isbi instead.
+;; If a -mtune option is added later, we should use the icache array
+;; dispatch method instead.
+(define_insn_reservation "sh4_300_ocbwb" 3
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "cwb"))
+ "all*3")
+
+;; JSR,BSR,BSRF
+;; Calls have a mandatory delay slot, which we'd like to fill with an insn
+;; that can be paired with the call itself.
+;; Scheduling runs before reorg, so we approximate this by saying that we
+;; want the call to be paired with a preceding insn.
+;; In most cases, the insn that loads the address of the call should have
+;; a non-zero latency (mov rn,rm doesn't make sense since we could use rn
+;; for the address then). Thus, a preceding insn that can be paired with
+;; a call should be elegible for the delay slot.
+;;
+;; calls introduce a longisch delay that is likely to flush the pipelines
+;; of the caller's instructions. Ordinary functions tend to end with a
+;; load to restore a register (in the delay slot of rts), while sfuncs
+;; tend to end with an EX or MT insn. But that is not actually relevant,
+;; since there are no instructions that contend for memory access early.
+;; We could, of course, provide exact scheduling information for specific
+;; sfuncs, if that should prove useful.
+
+(define_insn_reservation "sh4_300_call" 16
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "call,sfunc"))
+ "sh4_300_issue+sh4_300_br,all*15")
+
+;; FMOV.S / FMOV.D
+(define_insn_reservation "sh4_300_fmov" 1
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "fmove"))
+ "sh4_300_issue+sh4_300_fpt")
+
+;; LDS to FPSCR
+(define_insn_reservation "sh4_300_fpscr_load" 8
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "gp_fpscr"))
+ "sh4_300_issue+sh4_300_fpu+sh4_300_fpt")
+
+;; LDS.L to FPSCR
+(define_insn_reservation "sh4_300_fpscr_load_mem" 8
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "mem_fpscr"))
+ "sh4_300_issue+sh4_300_fpu+sh4_300_fpt+sh4_300_ls")
+
+
+;; Fixed point multiplication (DMULS.L DMULU.L MUL.L MULS.W,MULU.W)
+(define_insn_reservation "multi" 2
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "smpy,dmpy"))
+ "sh4_300_issue+sh4_300_mul")
+
+;; FPCHG, FRCHG, FSCHG
+(define_insn_reservation "fpscr_toggle" 1
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "fpscr_toggle"))
+ "sh4_300_issue+sh4_300_fpu+sh4_300_fpt")
+
+;; FCMP/EQ, FCMP/GT
+(define_insn_reservation "fp_cmp" 3
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "fp_cmp,dfp_cmp"))
+ "sh4_300_issue+sh4_300_fpu")
+
+;; Single precision floating point (FADD,FLOAT,FMAC,FMUL,FSUB,FTRC)
+;; Double-precision floating-point (FADD,FCNVDS,FCNVSD,FLOAT,FSUB,FTRC)
+(define_insn_reservation "fp_arith" 6
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "fp,ftrc_s,dfp_arith,dfp_conv"))
+ "sh4_300_issue+sh4_300_fpu")
+
+;; Single Precision FDIV/SQRT
+(define_insn_reservation "fp_div" 19
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "fdiv"))
+ "sh4_300_issue+sh4_300_fpu+sh4_300_fds,sh4_300_fds*15")
+
+;; Double-precision floating-point FMUL
+(define_insn_reservation "dfp_mul" 9
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "dfp_mul"))
+ "sh4_300_issue+sh4_300_fpu,sh4_300_fpu*3")
+
+;; Double precision FDIV/SQRT
+(define_insn_reservation "dp_div" 35
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "dfdiv"))
+ "sh4_300_issue+sh4_300_fpu+sh4_300_fds,sh4_300_fds*31")
+
+
+;; ??? We don't really want these for sh4-300.
+;; this pattern itself is likely to finish in 3 cycles, but also
+;; to disrupt branch prediction for taken branches for the following
+;; condbranch.
+(define_insn_reservation "sh4_300_arith3" 5
+ (and (eq_attr "pipe_model" "sh4_300")
+ (eq_attr "type" "arith3"))
+ "sh4_300_issue,all*4")
+
+;; arith3b insns without brach redirection make use of the 0-offset 0-latency
+;; branch feature, and thus schedule the same no matter if the branch is taken
+;; or not. If the branch is redirected, the taken branch might take longer,
+;; but then, we don't have to take the next branch.
+;; ??? should we suppress branch redirection for sh4-300 to improve branch
+;; target hit rates?
+(define_insn_reservation "arith3b" 2
+ (and (eq_attr "pipe_model" "sh4")
+ (eq_attr "type" "arith3"))
+ "issue,all")
diff --git a/gcc/config/sh/sh4.md b/gcc/config/sh/sh4.md
index 0937db8e6a3..b390ab99d05 100644
--- a/gcc/config/sh/sh4.md
+++ b/gcc/config/sh/sh4.md
@@ -1,5 +1,5 @@
;; DFA scheduling description for SH4.
-;; Copyright (C) 2004 Free Software Foundation, Inc.
+;; Copyright (C) 2004, 2006 Free Software Foundation, Inc.
;; This file is part of GCC.
@@ -209,9 +209,14 @@
(define_insn_reservation "sh4_store" 1
(and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "store"))
+ (eq_attr "type" "store,fstore"))
"issue+load_store,nothing,memory")
+(define_insn_reservation "mac_mem" 1
+ (and (eq_attr "pipe_model" "sh4")
+ (eq_attr "type" "mac_mem"))
+ "d_lock,nothing,memory")
+
;; Load Store instructions.
;; Group: LS
;; Latency: 1
@@ -372,35 +377,42 @@
;; Fixed point multiplication (DMULS.L DMULU.L MUL.L MULS.W,MULU.W)
;; Group: CO
;; Latency: 4 / 4
-;; Issue Rate: 1
+;; Issue Rate: 2
(define_insn_reservation "multi" 4
(and (eq_attr "pipe_model" "sh4")
(eq_attr "type" "smpy,dmpy"))
"d_lock,(d_lock+f1_1),(f1_1|f1_2)*3,F2")
-;; Fixed STS from MACL / MACH
+;; Fixed STS from, and LDS to MACL / MACH
;; Group: CO
;; Latency: 3
;; Issue Rate: 1
(define_insn_reservation "sh4_mac_gp" 3
(and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "mac_gp"))
+ (eq_attr "type" "mac_gp,gp_mac,mem_mac"))
"d_lock")
;; Single precision floating point computation FCMP/EQ,
-;; FCMP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRVHG, FSCHG
+;; FCMP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRCHG, FSCHG
;; Group: FE
;; Latency: 3/4
;; Issue Rate: 1
(define_insn_reservation "fp_arith" 3
(and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "fp"))
+ (eq_attr "type" "fp,fp_cmp"))
"issue,F01,F2")
+;; We don't model the resource usage of this exactly because that would
+;; introduce a bogus latency.
+(define_insn_reservation "sh4_fpscr_toggle" 1
+ (and (eq_attr "pipe_model" "sh4")
+ (eq_attr "type" "fpscr_toggle"))
+ "issue")
+
(define_insn_reservation "fp_arith_ftrc" 3
(and (eq_attr "pipe_model" "sh4")
(eq_attr "type" "ftrc_s"))
@@ -437,7 +449,7 @@
(define_insn_reservation "fp_double_arith" 8
(and (eq_attr "pipe_model" "sh4")
- (eq_attr "type" "dfp_arith"))
+ (eq_attr "type" "dfp_arith,dfp_mul"))
"issue,F01,F1+F2,fpu*4,F2")
;; Double-precision FCMP (FCMP/EQ,FCMP/GT)
diff --git a/gcc/config/sh/sh4a.md b/gcc/config/sh/sh4a.md
index 163a4e10d85..602c6545ae9 100644
--- a/gcc/config/sh/sh4a.md
+++ b/gcc/config/sh/sh4a.md
@@ -1,5 +1,5 @@
;; Scheduling description for Renesas SH4a
-;; Copyright (C) 2003, 2004 Free Software Foundation, Inc.
+;; Copyright (C) 2003, 2004, 2006 Free Software Foundation, Inc.
;;
;; This file is part of GCC.
;;
@@ -98,9 +98,11 @@
;; MOV
;; Group: MT
;; Latency: 0
+;; ??? not sure if movi8 belongs here, but that's where it was
+;; effectively before.
(define_insn_reservation "sh4a_mov" 0
(and (eq_attr "cpu" "sh4a")
- (eq_attr "type" "move"))
+ (eq_attr "type" "move,movi8,gp_mac"))
"ID_or")
;; Load
@@ -108,7 +110,7 @@
;; Latency: 3
(define_insn_reservation "sh4a_load" 3
(and (eq_attr "cpu" "sh4a")
- (eq_attr "type" "load,pcload"))
+ (eq_attr "type" "load,pcload,mem_mac"))
"sh4a_ls+sh4a_memory")
(define_insn_reservation "sh4a_load_si" 3
@@ -121,7 +123,7 @@
;; Latency: 0
(define_insn_reservation "sh4a_store" 0
(and (eq_attr "cpu" "sh4a")
- (eq_attr "type" "store"))
+ (eq_attr "type" "store,fstore,mac_mem"))
"sh4a_ls+sh4a_memory")
;; CWB TYPE
@@ -177,7 +179,7 @@
;; Latency: 3
(define_insn_reservation "sh4a_fp_arith" 3
(and (eq_attr "cpu" "sh4a")
- (eq_attr "type" "fp"))
+ (eq_attr "type" "fp,fp_cmp,fpscr_toggle"))
"ID_or,sh4a_fex")
(define_insn_reservation "sh4a_fp_arith_ftrc" 3
@@ -207,7 +209,7 @@
;; Latency: 5
(define_insn_reservation "sh4a_fp_double_arith" 5
(and (eq_attr "cpu" "sh4a")
- (eq_attr "type" "dfp_arith"))
+ (eq_attr "type" "dfp_arith,dfp_mul"))
"ID_or,sh4a_fex*3")
;; Double precision FDIV/SQRT
diff --git a/gcc/config/sh/superh.h b/gcc/config/sh/superh.h
index 49bb6206d43..65154926e33 100644
--- a/gcc/config/sh/superh.h
+++ b/gcc/config/sh/superh.h
@@ -75,17 +75,17 @@ Boston, MA 02110-1301, USA. */
on newlib and provide the runtime support */
#undef SUBTARGET_CPP_SPEC
#define SUBTARGET_CPP_SPEC \
-"-D__EMBEDDED_CROSS__ %{m4-100*:-D__SH4_100__} %{m4-200*:-D__SH4_200__} %{m4-400:-D__SH4_400__} %{m4-500:-D__SH4_500__} \
+"-D__EMBEDDED_CROSS__ %{m4-100*:-D__SH4_100__} %{m4-200*:-D__SH4_200__} %{m4-300*:-D__SH4_300__} %{m4-340:-D__SH4_340__} %{m4-400:-D__SH4_400__} %{m4-500:-D__SH4_500__} \
%(cppruntime)"
/* Override the SUBTARGET_ASM_SPEC to add the runtime support */
#undef SUBTARGET_ASM_SPEC
-#define SUBTARGET_ASM_SPEC "%{m4-100*|m4-200*:-isa=sh4} %{m4-400:-isa=sh4-nommu-nofpu} %{m4-500:-isa=sh4-nofpu} %(asruntime)"
+#define SUBTARGET_ASM_SPEC "%{m4-100*|m4-200*:-isa=sh4} %{m4-400|m4-340:-isa=sh4-nommu-nofpu} %{m4-500:-isa=sh4-nofpu} %(asruntime)"
/* Override the SUBTARGET_ASM_RELAX_SPEC so it doesn't interfere with the
runtime support by adding -isa=sh4 in the wrong place. */
#undef SUBTARGET_ASM_RELAX_SPEC
-#define SUBTARGET_ASM_RELAX_SPEC "%{!m4-100*:%{!m4-200*:%{!m4-400:%{!m4-500:-isa=sh4}}}}"
+#define SUBTARGET_ASM_RELAX_SPEC "%{!m4-100*:%{!m4-200*:%{!m4-300*:%{!m4-340:%{!m4-400:%{!m4-500:-isa=sh4}}}}}}"
/* Create the CC1_SPEC to add the runtime support */
#undef CC1_SPEC
@@ -102,7 +102,7 @@ Boston, MA 02110-1301, USA. */
/* Override STARTFILE_SPEC to add profiling and MMU support. */
#undef STARTFILE_SPEC
#define STARTFILE_SPEC \
- "%{!shared: %{!m4-400*: %{pg:gcrt1-mmu.o%s}%{!pg:crt1-mmu.o%s}}} \
- %{!shared: %{m4-400*: %{pg:gcrt1.o%s}%{!pg:crt1.o%s}}} \
+ "%{!shared: %{!m4-400*:%{!m4-340*: %{pg:gcrt1-mmu.o%s}%{!pg:crt1-mmu.o%s}}}} \
+ %{!shared: %{m4-340*|m4-400*: %{pg:gcrt1.o%s}%{!pg:crt1.o%s}}} \
crti.o%s \
%{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
diff --git a/gcc/config/sh/t-sh b/gcc/config/sh/t-sh
index 3ebc09d6e3c..56b6ba1c55a 100644
--- a/gcc/config/sh/t-sh
+++ b/gcc/config/sh/t-sh
@@ -38,11 +38,12 @@ MULTILIB_DIRNAMES=
# is why sh2a and sh2a-single need their own multilibs.
MULTILIB_MATCHES = $(shell \
multilibs="$(MULTILIB_OPTIONS)" ; \
- for abi in m1,m2,m3,m4-nofpu,m4-400,m4-500,m4al,m4a-nofpu m1,m2,m2a-nofpu \
- m2e,m3e,m4-single-only,m4-100-single-only,m4-200-single-only,m4a-single-only \
+ for abi in m1,m2,m3,m4-nofpu,m4-100-nofpu,m4-200-nofpu,m4-400,m4-500,m4-340,m4-300-nofpu,m4al,m4a-nofpu \
+ m1,m2,m2a-nofpu \
+ m2e,m3e,m4-single-only,m4-100-single-only,m4-200-single-only,m4-300-single-only,m4a-single-only \
m2e,m2a-single-only \
- m4-single,m4-100-single,m4-200-single,m4a-single \
- m4,m4-100,m4-200,m4a \
+ m4-single,m4-100-single,m4-200-single,m4-300-single,m4a-single \
+ m4,m4-100,m4-200,m4-300,m4a \
m5-32media,m5-compact,m5-32media \
m5-32media-nofpu,m5-compact-nofpu,m5-32media-nofpu; do \
subst= ; \
@@ -76,7 +77,7 @@ gt-sh.h : s-gtype ; @true
IC_EXTRA_PARTS= libic_invalidate_array_4-100.a libic_invalidate_array_4-200.a \
libic_invalidate_array_4a.a
-OPT_EXTRA_PARTS= libgcc-Os-4-200.a
+OPT_EXTRA_PARTS= libgcc-Os-4-200.a libgcc-4-300.a
EXTRA_MULTILIB_PARTS= $(IC_EXTRA_PARTS) $(OPT_EXTRA_PARTS)
$(T)ic_invalidate_array_4-100.o: $(srcdir)/config/sh/lib1funcs.asm $(GCC_PASSES)
@@ -104,6 +105,12 @@ OBJS_Os_4_200=$(T)sdivsi3_i4i-Os-4-200.o $(T)udivsi3_i4i-Os-4-200.o $(T)unwind-d
$(T)libgcc-Os-4-200.a: $(OBJS_Os_4_200) $(GCC_PASSES)
$(AR_CREATE_FOR_TARGET) $@ $(OBJS_Os_4_200)
+$(T)div_table-4-300.o: $(srcdir)/config/sh/lib1funcs-4-300.asm $(GCC_PASSES)
+ $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $@ -DL_div_table -x assembler-with-cpp $<
+
+$(T)libgcc-4-300.a: $(T)div_table-4-300.o $(GCC_PASSES)
+ $(AR_CREATE_FOR_TARGET) $@ $(T)div_table-4-300.o
+
# Local Variables:
# mode: Makefile
# End: